naming convention and running tests

This commit is contained in:
Fabian 2022-07-11 16:18:18 +02:00
parent 786da2290d
commit ade83b5ae6
13 changed files with 621 additions and 590 deletions

View File

@ -113,7 +113,7 @@ print("OpenAI Gym MP tasks:")
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS) print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
print("Deepmind Control MP tasks:") print("Deepmind Control MP tasks:")
print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS) print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("MetaWorld MP tasks:") print("MetaWorld MP tasks:")
print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS) print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS)

View File

@ -1,15 +1,14 @@
from alr_envs import dmc, meta, open_ai from alr_envs import dmc, meta, open_ai
from alr_envs.utils import make_dmc
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
# Convenience function for all MP environments # Convenience function for all MP environments
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
from .dmc import ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
ALL_MOTION_PRIMITIVE_ENVIRONMENTS = { ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS[key] + key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key] ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key]
for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()} for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()}

View File

@ -1,4 +1,7 @@
from typing import Tuple, Union, Optional import os
os.environ["MUJOCO_GL"] = "egl"
from typing import Tuple, Optional
import gym import gym
import numpy as np import numpy as np
@ -67,7 +70,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def observation(self, observation): def observation(self, observation):
# return context space if we are # return context space if we are
obs = observation[self.env.context_mask] if self.return_context_observation else observation mask = self.env.context_mask
if self.is_time_aware:
mask = np.append(mask, False)
obs = observation[mask] if self.return_context_observation else observation
# cast dtype because metaworld returns incorrect that throws gym error # cast dtype because metaworld returns incorrect that throws gym error
return obs.astype(self.observation_space.dtype) return obs.astype(self.observation_space.dtype)

View File

@ -2,7 +2,7 @@ from copy import deepcopy
from . import manipulation, suite from . import manipulation, suite
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
from gym.envs.registration import register from gym.envs.registration import register
@ -47,10 +47,9 @@ DEFAULT_BB_DICT_DMP = {
} }
} }
# DeepMind Control Suite (DMC) # DeepMind Control Suite (DMC)
kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_bic_dmp['name'] = f"ball_in_cup-catch" kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper) kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper)
kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1 kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
@ -58,304 +57,313 @@ kwargs_dict_bic_dmp['controller_kwargs']['p_gains'] = 50
kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1 kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1
register( register(
id=f'dmc_ball_in_cup-catch_dmp-v0', id=f'dmc_ball_in_cup-catch_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_bic_dmp
"name": f"ball_in_cup-catch", # {
"time_limit": 20, # "name": f"ball_in_cup-catch",
"episode_length": 1000, # "time_limit": 20,
"wrappers": [suite.ball_in_cup.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.ball_in_cup.MPWrapper],
"num_dof": 2, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 2,
"duration": 20, # "num_basis": 5,
"learn_goal": True, # "duration": 20,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "motor", # "bandwidth_factor": 2,
"goal_scale": 0.1, # "policy_type": "motor",
"policy_kwargs": { # "goal_scale": 0.1,
"p_gains": 50, # "policy_kwargs": {
"d_gains": 1 # "p_gains": 50,
} # "d_gains": 1
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_bic_promp['name'] = f"ball_in_cup-catch" kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper) kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50 kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50
kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1
register( register(
id=f'dmc_ball_in_cup-catch_promp-v0', id=f'dmc_ball_in_cup-catch_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_bic_promp
"name": f"ball_in_cup-catch", # {
"time_limit": 20, # "name": f"ball_in_cup-catch",
"episode_length": 1000, # "time_limit": 20,
"wrappers": [suite.ball_in_cup.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.ball_in_cup.MPWrapper],
"num_dof": 2, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 2,
"duration": 20, # "num_basis": 5,
"policy_type": "motor", # "duration": 20,
"zero_start": True, # "policy_type": "motor",
"policy_kwargs": { # "zero_start": True,
"p_gains": 50, # "policy_kwargs": {
"d_gains": 1 # "p_gains": 50,
} # "d_gains": 1
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_easy_dmp['name'] = f"reacher-easy" kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy"
kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1
register( register(
id=f'dmc_reacher-easy_dmp-v0', id=f'dmc_reacher-easy_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_bic_dmp
"name": f"reacher-easy", # {
"time_limit": 20, # "name": f"reacher-easy",
"episode_length": 1000, # "time_limit": 20,
"wrappers": [suite.reacher.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.reacher.MPWrapper],
"num_dof": 2, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 2,
"duration": 20, # "num_basis": 5,
"learn_goal": True, # "duration": 20,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "motor", # "bandwidth_factor": 2,
"weights_scale": 50, # "policy_type": "motor",
"goal_scale": 0.1, # "weights_scale": 50,
"policy_kwargs": { # "goal_scale": 0.1,
"p_gains": 50, # "policy_kwargs": {
"d_gains": 1 # "p_gains": 50,
} # "d_gains": 1
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_easy_promp['name'] = f"reacher-easy" kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy"
kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register( register(
id=f'dmc_reacher-easy_promp-v0', id=f'dmc_reacher-easy_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_reacher_easy_promp
"name": f"reacher-easy", # {
"time_limit": 20, # "name": f"reacher-easy",
"episode_length": 1000, # "time_limit": 20,
"wrappers": [suite.reacher.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.reacher.MPWrapper],
"num_dof": 2, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 2,
"duration": 20, # "num_basis": 5,
"policy_type": "motor", # "duration": 20,
"weights_scale": 0.2, # "policy_type": "motor",
"zero_start": True, # "weights_scale": 0.2,
"policy_kwargs": { # "zero_start": True,
"p_gains": 50, # "policy_kwargs": {
"d_gains": 1 # "p_gains": 50,
} # "d_gains": 1
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_hard_dmp['name'] = f"reacher-hard" kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard"
kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1
register( register(
id=f'dmc_reacher-hard_dmp-v0', id=f'dmc_reacher-hard_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_reacher_hard_dmp
"name": f"reacher-hard", # {
"time_limit": 20, # "name": f"reacher-hard",
"episode_length": 1000, # "time_limit": 20,
"wrappers": [suite.reacher.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.reacher.MPWrapper],
"num_dof": 2, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 2,
"duration": 20, # "num_basis": 5,
"learn_goal": True, # "duration": 20,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "motor", # "bandwidth_factor": 2,
"weights_scale": 50, # "policy_type": "motor",
"goal_scale": 0.1, # "weights_scale": 50,
"policy_kwargs": { # "goal_scale": 0.1,
"p_gains": 50, # "policy_kwargs": {
"d_gains": 1 # "p_gains": 50,
} # "d_gains": 1
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_hard_promp['name'] = f"reacher-hard" kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard"
kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper) kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50 kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1 kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register( register(
id=f'dmc_reacher-hard_promp-v0', id=f'dmc_reacher-hard_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_reacher_hard_promp
"name": f"reacher-hard", # {
"time_limit": 20, # "name": f"reacher-hard",
"episode_length": 1000, # "time_limit": 20,
"wrappers": [suite.reacher.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.reacher.MPWrapper],
"num_dof": 2, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 2,
"duration": 20, # "num_basis": 5,
"policy_type": "motor", # "duration": 20,
"weights_scale": 0.2, # "policy_type": "motor",
"zero_start": True, # "weights_scale": 0.2,
"policy_kwargs": { # "zero_start": True,
"p_gains": 50, # "policy_kwargs": {
"d_gains": 1 # "p_gains": 50,
} # "d_gains": 1
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] _dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
for _task in _dmc_cartpole_tasks: for _task in _dmc_cartpole_tasks:
_env_id = f'dmc_cartpole-{_task}_dmp-v0' _env_id = f'dmc_cartpole-{_task}_dmp-v0'
kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole_dmp['name'] = f"cartpole-{_task}" kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}"
kwargs_dict_cartpole_dmp['camera_id'] = 0
kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper) kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper)
kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 kwargs_dict_cartpole_dmp['trajectory_generator_kwargs'][
'weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_cartpole_dmp
"name": f"cartpole-{_task}", # {
# "time_limit": 1, # "name": f"cartpole-{_task}",
"camera_id": 0, # # "time_limit": 1,
"episode_length": 1000, # "camera_id": 0,
"wrappers": [suite.cartpole.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.cartpole.MPWrapper],
"num_dof": 1, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 1,
"duration": 10, # "num_basis": 5,
"learn_goal": True, # "duration": 10,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "motor", # "bandwidth_factor": 2,
"weights_scale": 50, # "policy_type": "motor",
"goal_scale": 0.1, # "weights_scale": 50,
"policy_kwargs": { # "goal_scale": 0.1,
"p_gains": 10, # "policy_kwargs": {
"d_gains": 10 # "p_gains": 10,
} # "d_gains": 10
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'dmc_cartpole-{_task}_promp-v0' _env_id = f'dmc_cartpole-{_task}_promp-v0'
kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole_promp['name'] = f"cartpole-{_task}" kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}"
kwargs_dict_cartpole_promp['camera_id'] = 0
kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper) kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper)
kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_cartpole_promp
"name": f"cartpole-{_task}", # {
# "time_limit": 1, # "name": f"cartpole-{_task}",
"camera_id": 0, # # "time_limit": 1,
"episode_length": 1000, # "camera_id": 0,
"wrappers": [suite.cartpole.MPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.cartpole.MPWrapper],
"num_dof": 1, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 1,
"duration": 10, # "num_basis": 5,
"policy_type": "motor", # "duration": 10,
"weights_scale": 0.2, # "policy_type": "motor",
"zero_start": True, # "weights_scale": 0.2,
"policy_kwargs": { # "zero_start": True,
"p_gains": 10, # "policy_kwargs": {
"d_gains": 10 # "p_gains": 10,
} # "d_gains": 10
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole2poles_dmp['name'] = f"cartpole-two_poles" kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles"
kwargs_dict_cartpole2poles_dmp['camera_id'] = 0
kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-two_poles_dmp-v0' _env_id = f'dmc_cartpole-two_poles_dmp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_cartpole2poles_dmp
"name": f"cartpole-two_poles", # {
# "time_limit": 1, # "name": f"cartpole-two_poles",
"camera_id": 0, # # "time_limit": 1,
"episode_length": 1000, # "camera_id": 0,
"wrappers": [suite.cartpole.TwoPolesMPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.cartpole.TwoPolesMPWrapper],
"num_dof": 1, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 1,
"duration": 10, # "num_basis": 5,
"learn_goal": True, # "duration": 10,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "motor", # "bandwidth_factor": 2,
"weights_scale": 50, # "policy_type": "motor",
"goal_scale": 0.1, # "weights_scale": 50,
"policy_kwargs": { # "goal_scale": 0.1,
"p_gains": 10, # "policy_kwargs": {
"d_gains": 10 # "p_gains": 10,
} # "d_gains": 10
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole2poles_promp['name'] = f"cartpole-two_poles" kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles"
kwargs_dict_cartpole2poles_promp['camera_id'] = 0
kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper) kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10
@ -363,70 +371,71 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-two_poles_promp-v0' _env_id = f'dmc_cartpole-two_poles_promp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_cartpole2poles_promp
"name": f"cartpole-two_poles", # {
# "time_limit": 1, # "name": f"cartpole-two_poles",
"camera_id": 0, # # "time_limit": 1,
"episode_length": 1000, # "camera_id": 0,
"wrappers": [suite.cartpole.TwoPolesMPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.cartpole.TwoPolesMPWrapper],
"num_dof": 1, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 1,
"duration": 10, # "num_basis": 5,
"policy_type": "motor", # "duration": 10,
"weights_scale": 0.2, # "policy_type": "motor",
"zero_start": True, # "weights_scale": 0.2,
"policy_kwargs": { # "zero_start": True,
"p_gains": 10, # "policy_kwargs": {
"d_gains": 10 # "p_gains": 10,
} # "d_gains": 10
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole3poles_dmp['name'] = f"cartpole-three_poles" kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles"
kwargs_dict_cartpole3poles_dmp['camera_id'] = 0
kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-three_poles_dmp-v0' _env_id = f'dmc_cartpole-three_poles_dmp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_cartpole3poles_dmp
"name": f"cartpole-three_poles", # {
# "time_limit": 1, # "name": f"cartpole-three_poles",
"camera_id": 0, # # "time_limit": 1,
"episode_length": 1000, # "camera_id": 0,
"wrappers": [suite.cartpole.ThreePolesMPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.cartpole.ThreePolesMPWrapper],
"num_dof": 1, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 1,
"duration": 10, # "num_basis": 5,
"learn_goal": True, # "duration": 10,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "motor", # "bandwidth_factor": 2,
"weights_scale": 50, # "policy_type": "motor",
"goal_scale": 0.1, # "weights_scale": 50,
"policy_kwargs": { # "goal_scale": 0.1,
"p_gains": 10, # "policy_kwargs": {
"d_gains": 10 # "p_gains": 10,
} # "d_gains": 10
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole3poles_promp['name'] = f"cartpole-three_poles" kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles"
kwargs_dict_cartpole3poles_promp['camera_id'] = 0
kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper) kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10 kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10
@ -434,81 +443,85 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-three_poles_promp-v0' _env_id = f'dmc_cartpole-three_poles_promp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_cartpole3poles_promp
"name": f"cartpole-three_poles", # {
# "time_limit": 1, # "name": f"cartpole-three_poles",
"camera_id": 0, # # "time_limit": 1,
"episode_length": 1000, # "camera_id": 0,
"wrappers": [suite.cartpole.ThreePolesMPWrapper], # "episode_length": 1000,
"traj_gen_kwargs": { # "wrappers": [suite.cartpole.ThreePolesMPWrapper],
"num_dof": 1, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 1,
"duration": 10, # "num_basis": 5,
"policy_type": "motor", # "duration": 10,
"weights_scale": 0.2, # "policy_type": "motor",
"zero_start": True, # "weights_scale": 0.2,
"policy_kwargs": { # "zero_start": True,
"p_gains": 10, # "policy_kwargs": {
"d_gains": 10 # "p_gains": 10,
} # "d_gains": 10
} # }
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# DeepMind Manipulation # DeepMind Manipulation
kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_mani_reach_site_features_dmp['name'] = f"manipulation-reach_site_features" kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper) kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper)
kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
register( register(
id=f'dmc_manipulation-reach_site_dmp-v0', id=f'dmc_manipulation-reach_site_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs=kwargs_dict_mani_reach_site_features_dmp
"name": f"manipulation-reach_site_features", # {
# "time_limit": 1, # "name": f"manipulation-reach_site_features",
"episode_length": 250, # # "time_limit": 1,
"wrappers": [manipulation.reach_site.MPWrapper], # "episode_length": 250,
"traj_gen_kwargs": { # "wrappers": [manipulation.reach_site.MPWrapper],
"num_dof": 9, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 9,
"duration": 10, # "num_basis": 5,
"learn_goal": True, # "duration": 10,
"alpha_phase": 2, # "learn_goal": True,
"bandwidth_factor": 2, # "alpha_phase": 2,
"policy_type": "velocity", # "bandwidth_factor": 2,
"weights_scale": 50, # "policy_type": "velocity",
"goal_scale": 0.1, # "weights_scale": 50,
} # "goal_scale": 0.1,
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_mani_reach_site_features_promp['name'] = f"manipulation-reach_site_features" kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper) kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper)
kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
register( register(
id=f'dmc_manipulation-reach_site_promp-v0', id=f'dmc_manipulation-reach_site_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs={ kwargs=kwargs_dict_mani_reach_site_features_promp
"name": f"manipulation-reach_site_features", # {
# "time_limit": 1, # "name": f"manipulation-reach_site_features",
"episode_length": 250, # # "time_limit": 1,
"wrappers": [manipulation.reach_site.MPWrapper], # "episode_length": 250,
"traj_gen_kwargs": { # "wrappers": [manipulation.reach_site.MPWrapper],
"num_dof": 9, # "traj_gen_kwargs": {
"num_basis": 5, # "num_dof": 9,
"duration": 10, # "num_basis": 5,
"policy_type": "velocity", # "duration": 10,
"weights_scale": 0.2, # "policy_type": "velocity",
"zero_start": True, # "weights_scale": 0.2,
} # "zero_start": True,
} # }
# }
) )
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")

View File

@ -2,17 +2,22 @@
# License: MIT # License: MIT
# Copyright (c) 2020 Denis Yarats # Copyright (c) 2020 Denis Yarats
import collections import collections
from typing import Any, Dict, Tuple from collections.abc import MutableMapping
from typing import Any, Dict, Tuple, Optional, Union, Callable
from dm_control import composer
import gym
import numpy as np import numpy as np
from dm_control import manipulation, suite from dm_control.rl import control
from dm_env import specs from dm_env import specs
from gym import core, spaces from gym import spaces
from gym.core import ObsType
def _spec_to_box(spec): def _spec_to_box(spec):
def extract_min_max(s): def extract_min_max(s):
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" assert s.dtype == np.float64 or s.dtype == np.float32, \
f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
dim = int(np.prod(s.shape)) dim = int(np.prod(s.shape))
if type(s) == specs.Array: if type(s) == specs.Array:
bound = np.inf * np.ones(dim, dtype=s.dtype) bound = np.inf * np.ones(dim, dtype=s.dtype)
@ -32,7 +37,7 @@ def _spec_to_box(spec):
return spaces.Box(low, high, dtype=s.dtype) return spaces.Box(low, high, dtype=s.dtype)
def _flatten_obs(obs: collections.MutableMapping): def _flatten_obs(obs: MutableMapping):
""" """
Flattens an observation of type MutableMapping, e.g. a dict to a 1D array. Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
Args: Args:
@ -42,7 +47,7 @@ def _flatten_obs(obs: collections.MutableMapping):
""" """
if not isinstance(obs, collections.MutableMapping): if not isinstance(obs, MutableMapping):
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.') raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
# Keep key order consistent for non OrderedDicts # Keep key order consistent for non OrderedDicts
@ -52,47 +57,19 @@ def _flatten_obs(obs: collections.MutableMapping):
return np.concatenate(obs_vals) return np.concatenate(obs_vals)
class DMCWrapper(core.Env): class DMCWrapper(gym.Env):
def __init__( def __init__(self,
self, env: Callable[[], Union[composer.Environment, control.Environment]],
domain_name: str,
task_name: str,
task_kwargs: dict = {},
visualize_reward: bool = True,
from_pixels: bool = False,
height: int = 84,
width: int = 84,
camera_id: int = 0,
frame_skip: int = 1,
environment_kwargs: dict = None,
channels_first: bool = True
): ):
assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.'
self._from_pixels = from_pixels
self._height = height
self._width = width
self._camera_id = camera_id
self._frame_skip = frame_skip
self._channels_first = channels_first
# create task # TODO: Currently this is required to be a function because dmc does not allow to copy composers environments
if domain_name == "manipulation": self._env = env()
assert not from_pixels and not task_name.endswith("_vision"), \
"TODO: Vision interface for manipulation is different to suite and needs to be implemented"
self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random'])
else:
self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
visualize_reward=visualize_reward, environment_kwargs=environment_kwargs)
# action and observation space # action and observation space
self._action_space = _spec_to_box([self._env.action_spec()]) self._action_space = _spec_to_box([self._env.action_spec()])
self._observation_space = _spec_to_box(self._env.observation_spec().values()) self._observation_space = _spec_to_box(self._env.observation_spec().values())
self._last_state = None self._window = None
self.viewer = None
# set seed
self.seed(seed=task_kwargs.get('random', 1))
def __getattr__(self, item): def __getattr__(self, item):
"""Propagate only non-existent properties to wrapped env.""" """Propagate only non-existent properties to wrapped env."""
@ -103,16 +80,6 @@ class DMCWrapper(core.Env):
return getattr(self._env, item) return getattr(self._env, item)
def _get_obs(self, time_step): def _get_obs(self, time_step):
if self._from_pixels:
obs = self.render(
mode="rgb_array",
height=self._height,
width=self._width,
camera_id=self._camera_id
)
if self._channels_first:
obs = obs.transpose(2, 0, 1).copy()
else:
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype) obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
return obs return obs
@ -126,20 +93,7 @@ class DMCWrapper(core.Env):
@property @property
def dt(self): def dt(self):
return self._env.control_timestep() * self._frame_skip return self._env.control_timestep()
@property
def base_step_limit(self):
"""
Returns: max_episode_steps of the underlying DMC env
"""
# Accessing private attribute because DMC does not expose time_limit or step_limit.
# Only the current time_step/time as well as the control_timestep can be accessed.
try:
return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip
except AttributeError as e:
return self._env._time_limit / self.dt
def seed(self, seed=None): def seed(self, seed=None):
self._action_space.seed(seed) self._action_space.seed(seed)
@ -147,56 +101,71 @@ class DMCWrapper(core.Env):
def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]: def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
assert self._action_space.contains(action) assert self._action_space.contains(action)
reward = 0
extra = {'internal_state': self._env.physics.get_state().copy()} extra = {'internal_state': self._env.physics.get_state().copy()}
for _ in range(self._frame_skip):
time_step = self._env.step(action) time_step = self._env.step(action)
reward += time_step.reward or 0. reward = time_step.reward or 0.
done = time_step.last() done = time_step.last()
if done:
break
self._last_state = _flatten_obs(time_step.observation)
obs = self._get_obs(time_step) obs = self._get_obs(time_step)
extra['discount'] = time_step.discount extra['discount'] = time_step.discount
return obs, reward, done, extra return obs, reward, done, extra
def reset(self) -> np.ndarray: def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
time_step = self._env.reset() time_step = self._env.reset()
self._last_state = _flatten_obs(time_step.observation)
obs = self._get_obs(time_step) obs = self._get_obs(time_step)
return obs return obs
def render(self, mode='rgb_array', height=None, width=None, camera_id=0): def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False,
if self._last_state is None: segmentation=False, scene_option=None, render_flag_overrides=None):
raise ValueError('Environment not ready to render. Call reset() first.')
camera_id = camera_id or self._camera_id
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
if mode == "rgb_array": if mode == "rgb_array":
height = height or self._height return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays,
width = width or self._width depth=depth, segmentation=segmentation, scene_option=scene_option,
return self._env.physics.render(height=height, width=width, camera_id=camera_id) render_flag_overrides=render_flag_overrides)
elif mode == 'human':
if self.viewer is None:
# pylint: disable=import-outside-toplevel
# pylint: disable=g-import-not-at-top
from gym.envs.classic_control import rendering
self.viewer = rendering.SimpleImageViewer()
# Render max available buffer size. Larger is only possible by altering the XML. # Render max available buffer size. Larger is only possible by altering the XML.
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight, img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
width=self._env.physics.model.vis.global_.offwidth, width=self._env.physics.model.vis.global_.offwidth,
camera_id=camera_id) camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation,
self.viewer.imshow(img) scene_option=scene_option, render_flag_overrides=render_flag_overrides)
return self.viewer.isopen
if depth:
img = np.dstack([img.astype(np.uint8)] * 3)
if mode == 'human':
try:
import cv2
if self._window is None:
self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE)
cv2.imshow(self.id, img[..., ::-1]) # Image in BGR
cv2.waitKey(1)
except ImportError:
import pygame
img = img.transpose((1, 0, 2))
if self._window is None:
pygame.init()
pygame.display.init()
self._window = pygame.display.set_mode(img.shape[:2])
self._window.blit(pygame.surfarray.make_surface(img), (0, 0))
pygame.event.pump()
pygame.display.flip()
def close(self): def close(self):
super().close() super().close()
if self.viewer is not None and self.viewer.isopen: if self._window is not None:
self.viewer.close() try:
import cv2
cv2.destroyWindow(self.id)
except ImportError:
import pygame
pygame.display.quit()
pygame.quit()
@property @property
def reward_range(self) -> Tuple[float, float]: def reward_range(self) -> Tuple[float, float]:
@ -204,3 +173,8 @@ class DMCWrapper(core.Env):
if isinstance(reward_spec, specs.BoundedArray): if isinstance(reward_spec, specs.BoundedArray):
return reward_spec.minimum, reward_spec.maximum return reward_spec.minimum, reward_spec.maximum
return -float('inf'), float('inf') return -float('inf'), float('inf')
@property
def metadata(self):
return {'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': round(1.0 / self._env.control_timestep())}

View File

@ -1,3 +1,5 @@
import numpy as np
import alr_envs import alr_envs
@ -59,7 +61,8 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
""" """
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs. # Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
# E.g. here for way to many basis functions # E.g. here for way to many basis functions
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) # env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
env = alr_envs.make(env_name, seed)
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}}) # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}}) # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
@ -72,15 +75,16 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(iterations): for i in range(iterations):
ac = env.action_space.sample() * 1000 ac = env.action_space.sample()
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward rewards += reward
if done: if done:
print(rewards) print(i, rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
print(obs)
return obs
def example_fully_custom_mp(seed=1, iterations=1, render=True): def example_fully_custom_mp(seed=1, iterations=1, render=True):
@ -139,7 +143,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
if __name__ == '__main__': if __name__ == '__main__':
render = True render = False
# # DMP # # DMP
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) # example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
# #
@ -150,7 +154,7 @@ if __name__ == '__main__':
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) # example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
# Altered basis functions # Altered basis functions
example_custom_mp("HopperJumpSparseProMP-v0", seed=10, iterations=10, render=render) obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
# Custom MP # Custom MP
# example_fully_custom_mp(seed=10, iterations=1, render=render) # example_fully_custom_mp(seed=10, iterations=1, render=render)

View File

@ -36,7 +36,7 @@ for _task in _goal_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}' _env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper) kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper)
kwargs_dict_goal_change_promp['name'] = _task kwargs_dict_goal_change_promp['name'] = f'metaworld:{_task}'
register( register(
id=_env_id, id=_env_id,
@ -52,7 +52,7 @@ for _task in _object_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}' _env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper) kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper)
kwargs_dict_object_change_promp['name'] = _task kwargs_dict_object_change_promp['name'] = f'metaworld:{_task}'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
@ -77,7 +77,7 @@ for _task in _goal_and_object_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}' _env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper) kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper)
kwargs_dict_goal_and_object_change_promp['name'] = _task kwargs_dict_goal_and_object_change_promp['name'] = f'metaworld:{_task}'
register( register(
id=_env_id, id=_env_id,
@ -93,7 +93,7 @@ for _task in _goal_and_endeffector_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}' _env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper) kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper)
kwargs_dict_goal_and_endeffector_change_promp['name'] = _task kwargs_dict_goal_and_endeffector_change_promp['name'] = f'metaworld:{_task}'
register( register(
id=_env_id, id=_env_id,

View File

@ -27,7 +27,6 @@ DEFAULT_BB_DICT_ProMP = {
} }
} }
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6 kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075 kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075
@ -35,7 +34,7 @@ kwargs_dict_reacher_promp['basis_generator_kwargs']['num_basis'] = 6
kwargs_dict_reacher_promp['name'] = "Reacher-v2" kwargs_dict_reacher_promp['name'] = "Reacher-v2"
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper) kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper)
register( register(
id='Reacher2dProMP-v2', id='ReacherProMP-v2',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_promp kwargs=kwargs_dict_reacher_promp
) )

View File

@ -1,65 +1 @@
import re
from typing import Union
import gym
from gym.envs.registration import register
from alr_envs.utils.make_env_helpers import make
def make_dmc(
id: str,
seed: int = 1,
visualize_reward: bool = True,
from_pixels: bool = False,
height: int = 84,
width: int = 84,
camera_id: int = 0,
frame_skip: int = 1,
episode_length: Union[None, int] = None,
environment_kwargs: dict = {},
time_limit: Union[None, float] = None,
channels_first: bool = True
):
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
# License: MIT
# Copyright (c) 2020 Denis Yarats
if not re.match(r"\w+-\w+", id):
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
domain_name, task_name = id.split("-")
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
if from_pixels:
assert not visualize_reward, 'Cannot use visualize reward when learning from pixels.'
# Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
episode_length = episode_length or (250 if domain_name == "manipulation" else 1000)
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
if env_id not in gym.envs.registry.env_specs:
task_kwargs = {'random': seed}
# if seed is not None:
# task_kwargs['random'] = seed
if time_limit is not None:
task_kwargs['time_limit'] = time_limit
register(
id=env_id,
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
kwargs=dict(
domain_name=domain_name,
task_name=task_name,
task_kwargs=task_kwargs,
environment_kwargs=environment_kwargs,
visualize_reward=visualize_reward,
from_pixels=from_pixels,
height=height,
width=width,
camera_id=camera_id,
frame_skip=frame_skip,
channels_first=channels_first,
),
max_episode_steps=max_episode_steps,
)
return gym.make(env_id)

View File

@ -1,20 +1,41 @@
import warnings import re
import uuid
from collections.abc import MutableMapping
from copy import deepcopy from copy import deepcopy
from typing import Iterable, Type, Union, MutableMapping from math import ceil
from typing import Iterable, Type, Union
import gym import gym
import numpy as np import numpy as np
from gym.envs.registration import EnvSpec, registry
import alr_envs
try:
from dm_control import suite, manipulation, composer
from dm_control.rl import control
except ImportError:
pass
try:
import metaworld
except Exception:
# catch Exception due to Mujoco-py
pass
from gym.envs.registration import registry
from gym.envs.registration import register
from gym.wrappers import TimeAwareObservation from gym.wrappers import TimeAwareObservation
from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
from alr_envs.black_box.factory.controller_factory import get_controller
from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
from alr_envs.black_box.factory.controller_factory import get_controller
from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import nested_update from alr_envs.utils.utils import nested_update
ALL_FRAMEWORK_TYPES = ['meta', 'dmc', 'gym']
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
""" """
@ -70,57 +91,25 @@ def _make(env_id: str, seed, **kwargs):
# env_id.split(':') # env_id.split(':')
# if 'dmc' : # if 'dmc' :
try: if ':' in env_id:
# This access is required to allow for nested dict updates for BB envs split_id = env_id.split(':')
spec = registry.get(env_id) framework, env_id = split_id[-2:]
all_kwargs = deepcopy(spec.kwargs)
nested_update(all_kwargs, kwargs)
kwargs = all_kwargs
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
if env_id.startswith("dmc"):
kwargs.update({"seed": seed})
# Gym
env = gym.make(env_id, **kwargs)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
except (gym.error.Error, AttributeError):
# MetaWorld env
import metaworld
if env_id in metaworld.ML1.ENV_NAMES:
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
env._freeze_rand_vec = False
env.seeded_rand_vec = True
# Manually set spec, as metaworld environments are not registered via gym
env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
# env.seed(seed)
# env.action_space.seed(seed)
# env.observation_space.seed(seed)
# env.goal_space.seed(seed)
else: else:
# DMC framework = None
from alr_envs import make_dmc
env = make_dmc(env_id, seed=seed, **kwargs)
if not env.base_step_limit == env.spec.max_episode_steps: if framework == 'metaworld':
raise ValueError(f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym " # MetaWorld env
f"is different from the DMC environment specification of {env.base_step_limit} steps.") env = make_metaworld(env_id, seed=seed, **kwargs)
elif framework == 'dmc':
# DeepMind Controlp
env = make_dmc(env_id, seed=seed, **kwargs)
else:
env = make_gym(env_id, seed=seed, **kwargs)
return env return env
def _make_wrapped_env( def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs
):
""" """
Helper function for creating a wrapped gym environment using MPs. Helper function for creating a wrapped gym environment using MPs.
It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
@ -149,7 +138,7 @@ def _make_wrapped_env(
def make_bb( def make_bb(
env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping, env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1, controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1,
**kwargs): **kwargs):
""" """
This can also be used standalone for manually building a custom DMP environment. This can also be used standalone for manually building a custom DMP environment.
@ -167,7 +156,6 @@ def make_bb(
""" """
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None)) _verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories') learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
do_replanning = black_box_kwargs.get('replanning_schedule') do_replanning = black_box_kwargs.get('replanning_schedule')
@ -176,12 +164,16 @@ def make_bb(
if learn_sub_trajs or do_replanning: if learn_sub_trajs or do_replanning:
# add time_step observation when replanning # add time_step observation when replanning
kwargs['wrappers'].append(TimeAwareObservation) if not any(issubclass(w, TimeAwareObservation) for w in kwargs['wrappers']):
# Add as first wrapper in order to alter observation
kwargs['wrappers'].insert(0, TimeAwareObservation)
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item()) env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item())
if black_box_kwargs.get('duration') is None: if black_box_kwargs.get('duration') is None:
black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt black_box_kwargs['duration'] = env.spec.max_episode_steps * env.dt
if phase_kwargs.get('tau') is None: if phase_kwargs.get('tau') is None:
phase_kwargs['tau'] = black_box_kwargs['duration'] phase_kwargs['tau'] = black_box_kwargs['duration']
@ -194,7 +186,7 @@ def make_bb(
controller = get_controller(**controller_kwargs) controller = get_controller(**controller_kwargs)
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs) traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller, bb_env = BlackBoxWrapper(env, trajectory_generator=traj_gen, tracking_controller=controller,
**black_box_kwargs) **black_box_kwargs)
return bb_env return bb_env
@ -249,6 +241,109 @@ def make_bb_env_helper(**kwargs):
basis_kwargs=basis_kwargs, **kwargs, seed=seed) basis_kwargs=basis_kwargs, **kwargs, seed=seed)
def make_dmc(
env_id: Union[str, composer.Environment, control.Environment],
seed: int = None,
visualize_reward: bool = True,
time_limit: Union[None, float] = None,
**kwargs
):
if not re.match(r"\w+-\w+", env_id):
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
domain_name, task_name = env_id.split("-")
if task_name.endswith("_vision"):
# TODO
raise ValueError("The vision interface for manipulation tasks is currently not supported.")
if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL:
raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.')
# env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
gym_id = uuid.uuid4().hex + '-v1'
task_kwargs = {'random': seed}
if time_limit is not None:
task_kwargs['time_limit'] = time_limit
# create task
# Accessing private attribute because DMC does not expose time_limit or step_limit.
# Only the current time_step/time as well as the control_timestep can be accessed.
if domain_name == "manipulation":
env = manipulation.load(environment_name=task_name, seed=seed)
max_episode_steps = ceil(env._time_limit / env.control_timestep())
else:
env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
visualize_reward=visualize_reward, environment_kwargs=kwargs)
max_episode_steps = int(env._step_limit)
register(
id=gym_id,
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
kwargs={'env': lambda: env},
max_episode_steps=max_episode_steps,
)
env = gym.make(gym_id)
env.seed(seed=seed)
return env
def make_metaworld(env_id, seed, **kwargs):
if env_id not in metaworld.ML1.ENV_NAMES:
raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.')
_env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
_env._freeze_rand_vec = False
# New argument to use global seeding
_env.seeded_rand_vec = True
# Manually set spec, as metaworld environments are not registered via gym
# _env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
# _env = gym.wrappers.TimeLimit(_env, max_episode_steps=_env.max_path_length)
# _env.seed(seed)
# _env.action_space.seed(seed)
# _env.observation_space.seed(seed)
# _env.goal_space.seed(seed)
gym_id = uuid.uuid4().hex + '-v1'
register(
id=gym_id,
entry_point=lambda: _env,
max_episode_steps=_env.max_path_length,
)
# TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld
env = gym.make(gym_id, disable_env_checker=True)
env.seed(seed=seed)
return env
def make_gym(env_id, seed, **kwargs):
# This access is required to allow for nested dict updates for BB envs
spec = registry.get(env_id)
all_kwargs = deepcopy(spec.kwargs)
nested_update(all_kwargs, kwargs)
kwargs = all_kwargs
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
# if env_id.startswith("dmc") or any(s in env_id.lower() for s in ['promp', 'dmp', 'prodmp']):
all_bb_envs = sum(alr_envs.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), [])
if env_id.startswith("dmc") or env_id in all_bb_envs:
kwargs.update({"seed": seed})
# Gym
env = gym.make(env_id, **kwargs)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
return env
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]): def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
""" """
When using DMC check if a manually specified time limit matches the trajectory duration the MP receives. When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.

View File

@ -40,9 +40,9 @@ class TestMPEnvironments(unittest.TestCase):
for i in range(iterations): for i in range(iterations):
observations.append(obs) observations.append(obs)
ac = env.action_space.sample() actions = env.action_space.sample()
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(actions)
self._verify_observations(obs, env.observation_space, "step()") self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward) self._verify_reward(reward)
@ -55,13 +55,13 @@ class TestMPEnvironments(unittest.TestCase):
env.render("human") env.render("human")
if done: if done:
obs = env.reset() break
assert done, "Done flag is not True after max episode length." assert done, "Done flag is not True after end of episode."
observations.append(obs) observations.append(obs)
env.close() env.close()
del env del env
return np.array(observations), np.array(rewards), np.array(dones) return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _run_env_determinism(self, ids): def _run_env_determinism(self, ids):
seed = 0 seed = 0
@ -70,8 +70,9 @@ class TestMPEnvironments(unittest.TestCase):
traj1 = self._run_env(env_id, seed=seed) traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed) traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)): for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.allclose(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
@ -81,7 +82,7 @@ class TestMPEnvironments(unittest.TestCase):
f"not contained in observation space {observation_space}.") f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward): def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
def _verify_done(self, done): def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
@ -113,12 +114,12 @@ class TestMPEnvironments(unittest.TestCase):
def test_dmc_environment_functionality(self): def test_dmc_environment_functionality(self):
"""Tests that environments runs without errors using random actions for DMC MP envs.""" """Tests that environments runs without errors using random actions for DMC MP envs."""
with self.subTest(msg="DMP"): with self.subTest(msg="DMP"):
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']:
with self.subTest(msg=env_id): with self.subTest(msg=env_id):
self._run_env(env_id) self._run_env(env_id)
with self.subTest(msg="ProMP"): with self.subTest(msg="ProMP"):
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']:
with self.subTest(msg=env_id): with self.subTest(msg=env_id):
self._run_env(env_id) self._run_env(env_id)
@ -151,9 +152,9 @@ class TestMPEnvironments(unittest.TestCase):
def test_dmc_environment_determinism(self): def test_dmc_environment_determinism(self):
"""Tests that identical seeds produce identical trajectories for DMC MP Envs.""" """Tests that identical seeds produce identical trajectories for DMC MP Envs."""
with self.subTest(msg="DMP"): with self.subTest(msg="DMP"):
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"])
with self.subTest(msg="ProMP"): with self.subTest(msg="ProMP"):
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"])
def test_metaworld_environment_determinism(self): def test_metaworld_environment_determinism(self):
"""Tests that identical seeds produce identical trajectories for Metaworld MP Envs.""" """Tests that identical seeds produce identical trajectories for Metaworld MP Envs."""

View File

@ -7,8 +7,8 @@ from dm_control import suite, manipulation
from alr_envs import make from alr_envs import make
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
SEED = 1 SEED = 1
@ -29,9 +29,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
Returns: Returns:
""" """
print(env_id)
env: gym.Env = make(env_id, seed=seed) env: gym.Env = make(env_id, seed=seed)
rewards = [] rewards = []
observations = [] observations = []
actions = []
dones = [] dones = []
obs = env.reset() obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()") self._verify_observations(obs, env.observation_space, "reset()")
@ -43,6 +45,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
observations.append(obs) observations.append(obs)
ac = env.action_space.sample() ac = env.action_space.sample()
actions.append(ac)
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
@ -57,13 +60,13 @@ class TestStepDMCEnvironments(unittest.TestCase):
env.render("human") env.render("human")
if done: if done:
obs = env.reset() break
assert done, "Done flag is not True after max episode length." assert done, "Done flag is not True after end of episode."
observations.append(obs) observations.append(obs)
env.close() env.close()
del env del env
return np.array(observations), np.array(rewards), np.array(dones) return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _verify_observations(self, obs, observation_space, obs_type="reset()"): def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs), self.assertTrue(observation_space.contains(obs),
@ -71,7 +74,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
f"not contained in observation space {observation_space}.") f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward): def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
def _verify_done(self, done): def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
@ -91,8 +94,9 @@ class TestStepDMCEnvironments(unittest.TestCase):
traj1 = self._run_env(env_id, seed=seed) traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed) traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)): for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
@ -111,11 +115,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
traj1 = self._run_env(env_id, seed=seed) traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed) traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)): for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -6,7 +6,7 @@ import numpy as np
from alr_envs import make from alr_envs import make
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
SEED = 1 SEED = 1
@ -57,9 +57,9 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
env.render("human") env.render("human")
if done: if done:
obs = env.reset() break
assert done, "Done flag is not True after max episode length." assert done, "Done flag is not True after end of episode."
observations.append(obs) observations.append(obs)
env.close() env.close()
del env del env
@ -71,7 +71,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
f"not contained in observation space {observation_space}.") f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward): def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
def _verify_done(self, done): def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
@ -94,7 +94,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")