naming convention and running tests
This commit is contained in:
parent
786da2290d
commit
ade83b5ae6
@ -113,7 +113,7 @@ print("OpenAI Gym MP tasks:")
|
||||
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
|
||||
|
||||
print("Deepmind Control MP tasks:")
|
||||
print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS)
|
||||
print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
|
||||
print("MetaWorld MP tasks:")
|
||||
print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS)
|
||||
|
@ -1,15 +1,14 @@
|
||||
from alr_envs import dmc, meta, open_ai
|
||||
from alr_envs.utils import make_dmc
|
||||
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
|
||||
|
||||
# Convenience function for all MP environments
|
||||
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||
from .dmc import ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||
|
||||
ALL_MOTION_PRIMITIVE_ENVIRONMENTS = {
|
||||
key: value + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
|
||||
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key]
|
||||
for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()}
|
||||
|
@ -1,4 +1,7 @@
|
||||
from typing import Tuple, Union, Optional
|
||||
import os
|
||||
os.environ["MUJOCO_GL"] = "egl"
|
||||
|
||||
from typing import Tuple, Optional
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
@ -67,7 +70,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
|
||||
def observation(self, observation):
|
||||
# return context space if we are
|
||||
obs = observation[self.env.context_mask] if self.return_context_observation else observation
|
||||
mask = self.env.context_mask
|
||||
if self.is_time_aware:
|
||||
mask = np.append(mask, False)
|
||||
obs = observation[mask] if self.return_context_observation else observation
|
||||
# cast dtype because metaworld returns incorrect that throws gym error
|
||||
return obs.astype(self.observation_space.dtype)
|
||||
|
||||
|
@ -2,7 +2,7 @@ from copy import deepcopy
|
||||
|
||||
from . import manipulation, suite
|
||||
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
|
||||
from gym.envs.registration import register
|
||||
|
||||
@ -47,10 +47,9 @@ DEFAULT_BB_DICT_DMP = {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# DeepMind Control Suite (DMC)
|
||||
kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_bic_dmp['name'] = f"ball_in_cup-catch"
|
||||
kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch"
|
||||
kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
||||
kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
|
||||
@ -58,304 +57,313 @@ kwargs_dict_bic_dmp['controller_kwargs']['p_gains'] = 50
|
||||
kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1
|
||||
register(
|
||||
id=f'dmc_ball_in_cup-catch_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"ball_in_cup-catch",
|
||||
"time_limit": 20,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.ball_in_cup.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 20,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "motor",
|
||||
"goal_scale": 0.1,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
"d_gains": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_bic_dmp
|
||||
# {
|
||||
# "name": f"ball_in_cup-catch",
|
||||
# "time_limit": 20,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.ball_in_cup.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 2,
|
||||
# "num_basis": 5,
|
||||
# "duration": 20,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "motor",
|
||||
# "goal_scale": 0.1,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 50,
|
||||
# "d_gains": 1
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
|
||||
|
||||
kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_bic_promp['name'] = f"ball_in_cup-catch"
|
||||
kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
|
||||
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
||||
kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50
|
||||
kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1
|
||||
register(
|
||||
id=f'dmc_ball_in_cup-catch_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"ball_in_cup-catch",
|
||||
"time_limit": 20,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.ball_in_cup.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 20,
|
||||
"policy_type": "motor",
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
"d_gains": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bic_promp
|
||||
# {
|
||||
# "name": f"ball_in_cup-catch",
|
||||
# "time_limit": 20,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.ball_in_cup.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 2,
|
||||
# "num_basis": 5,
|
||||
# "duration": 20,
|
||||
# "policy_type": "motor",
|
||||
# "zero_start": True,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 50,
|
||||
# "d_gains": 1
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
|
||||
|
||||
kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_reacher_easy_dmp['name'] = f"reacher-easy"
|
||||
kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy"
|
||||
kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
# TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50
|
||||
kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1
|
||||
register(
|
||||
id=f'dmc_reacher-easy_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"reacher-easy",
|
||||
"time_limit": 20,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.reacher.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 20,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
"d_gains": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_bic_dmp
|
||||
# {
|
||||
# "name": f"reacher-easy",
|
||||
# "time_limit": 20,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.reacher.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 2,
|
||||
# "num_basis": 5,
|
||||
# "duration": 20,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 50,
|
||||
# "goal_scale": 0.1,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 50,
|
||||
# "d_gains": 1
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
|
||||
|
||||
kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_reacher_easy_promp['name'] = f"reacher-easy"
|
||||
kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy"
|
||||
kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50
|
||||
kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1
|
||||
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
register(
|
||||
id=f'dmc_reacher-easy_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"reacher-easy",
|
||||
"time_limit": 20,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.reacher.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 20,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.2,
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
"d_gains": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_easy_promp
|
||||
# {
|
||||
# "name": f"reacher-easy",
|
||||
# "time_limit": 20,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.reacher.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 2,
|
||||
# "num_basis": 5,
|
||||
# "duration": 20,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 0.2,
|
||||
# "zero_start": True,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 50,
|
||||
# "d_gains": 1
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
|
||||
|
||||
kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_reacher_hard_dmp['name'] = f"reacher-hard"
|
||||
kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard"
|
||||
kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
# TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50
|
||||
kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1
|
||||
register(
|
||||
id=f'dmc_reacher-hard_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"reacher-hard",
|
||||
"time_limit": 20,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.reacher.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 20,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
"d_gains": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_reacher_hard_dmp
|
||||
# {
|
||||
# "name": f"reacher-hard",
|
||||
# "time_limit": 20,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.reacher.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 2,
|
||||
# "num_basis": 5,
|
||||
# "duration": 20,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 50,
|
||||
# "goal_scale": 0.1,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 50,
|
||||
# "d_gains": 1
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
|
||||
|
||||
kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_reacher_hard_promp['name'] = f"reacher-hard"
|
||||
kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard"
|
||||
kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50
|
||||
kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1
|
||||
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
register(
|
||||
id=f'dmc_reacher-hard_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"reacher-hard",
|
||||
"time_limit": 20,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.reacher.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 20,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.2,
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
"d_gains": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_hard_promp
|
||||
# {
|
||||
# "name": f"reacher-hard",
|
||||
# "time_limit": 20,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.reacher.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 2,
|
||||
# "num_basis": 5,
|
||||
# "duration": 20,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 0.2,
|
||||
# "zero_start": True,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 50,
|
||||
# "d_gains": 1
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
|
||||
|
||||
_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
|
||||
|
||||
for _task in _dmc_cartpole_tasks:
|
||||
_env_id = f'dmc_cartpole-{_task}_dmp-v0'
|
||||
kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_cartpole_dmp['name'] = f"cartpole-{_task}"
|
||||
kwargs_dict_cartpole_dmp['camera_id'] = 0
|
||||
kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}"
|
||||
kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper)
|
||||
kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs'][
|
||||
'weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10
|
||||
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"cartpole-{_task}",
|
||||
# "time_limit": 1,
|
||||
"camera_id": 0,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.cartpole.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 1,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 10,
|
||||
"d_gains": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_cartpole_dmp
|
||||
# {
|
||||
# "name": f"cartpole-{_task}",
|
||||
# # "time_limit": 1,
|
||||
# "camera_id": 0,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.cartpole.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 1,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 50,
|
||||
# "goal_scale": 0.1,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 10,
|
||||
# "d_gains": 10
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
_env_id = f'dmc_cartpole-{_task}_promp-v0'
|
||||
kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_cartpole_promp['name'] = f"cartpole-{_task}"
|
||||
kwargs_dict_cartpole_promp['camera_id'] = 0
|
||||
kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}"
|
||||
kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper)
|
||||
kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10
|
||||
kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10
|
||||
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"cartpole-{_task}",
|
||||
# "time_limit": 1,
|
||||
"camera_id": 0,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.cartpole.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 1,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.2,
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 10,
|
||||
"d_gains": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole_promp
|
||||
# {
|
||||
# "name": f"cartpole-{_task}",
|
||||
# # "time_limit": 1,
|
||||
# "camera_id": 0,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.cartpole.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 1,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 0.2,
|
||||
# "zero_start": True,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 10,
|
||||
# "d_gains": 10
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_cartpole2poles_dmp['name'] = f"cartpole-two_poles"
|
||||
kwargs_dict_cartpole2poles_dmp['camera_id'] = 0
|
||||
kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles"
|
||||
kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
|
||||
kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
# TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10
|
||||
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
|
||||
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"cartpole-two_poles",
|
||||
# "time_limit": 1,
|
||||
"camera_id": 0,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 1,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 10,
|
||||
"d_gains": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_cartpole2poles_dmp
|
||||
# {
|
||||
# "name": f"cartpole-two_poles",
|
||||
# # "time_limit": 1,
|
||||
# "camera_id": 0,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 1,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 50,
|
||||
# "goal_scale": 0.1,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 10,
|
||||
# "d_gains": 10
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_cartpole2poles_promp['name'] = f"cartpole-two_poles"
|
||||
kwargs_dict_cartpole2poles_promp['camera_id'] = 0
|
||||
kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles"
|
||||
kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
|
||||
kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10
|
||||
kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10
|
||||
@ -363,70 +371,71 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
|
||||
_env_id = f'dmc_cartpole-two_poles_promp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"cartpole-two_poles",
|
||||
# "time_limit": 1,
|
||||
"camera_id": 0,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 1,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.2,
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 10,
|
||||
"d_gains": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole2poles_promp
|
||||
# {
|
||||
# "name": f"cartpole-two_poles",
|
||||
# # "time_limit": 1,
|
||||
# "camera_id": 0,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 1,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 0.2,
|
||||
# "zero_start": True,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 10,
|
||||
# "d_gains": 10
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_cartpole3poles_dmp['name'] = f"cartpole-three_poles"
|
||||
kwargs_dict_cartpole3poles_dmp['camera_id'] = 0
|
||||
kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles"
|
||||
kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
|
||||
kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
# TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10
|
||||
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
|
||||
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"cartpole-three_poles",
|
||||
# "time_limit": 1,
|
||||
"camera_id": 0,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 1,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 10,
|
||||
"d_gains": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_cartpole3poles_dmp
|
||||
# {
|
||||
# "name": f"cartpole-three_poles",
|
||||
# # "time_limit": 1,
|
||||
# "camera_id": 0,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 1,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 50,
|
||||
# "goal_scale": 0.1,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 10,
|
||||
# "d_gains": 10
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_cartpole3poles_promp['name'] = f"cartpole-three_poles"
|
||||
kwargs_dict_cartpole3poles_promp['camera_id'] = 0
|
||||
kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles"
|
||||
kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
|
||||
kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10
|
||||
kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10
|
||||
@ -434,81 +443,85 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
|
||||
_env_id = f'dmc_cartpole-three_poles_promp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"cartpole-three_poles",
|
||||
# "time_limit": 1,
|
||||
"camera_id": 0,
|
||||
"episode_length": 1000,
|
||||
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 1,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.2,
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 10,
|
||||
"d_gains": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole3poles_promp
|
||||
# {
|
||||
# "name": f"cartpole-three_poles",
|
||||
# # "time_limit": 1,
|
||||
# "camera_id": 0,
|
||||
# "episode_length": 1000,
|
||||
# "wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 1,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "policy_type": "motor",
|
||||
# "weights_scale": 0.2,
|
||||
# "zero_start": True,
|
||||
# "policy_kwargs": {
|
||||
# "p_gains": 10,
|
||||
# "d_gains": 10
|
||||
# }
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
# DeepMind Manipulation
|
||||
kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_mani_reach_site_features_dmp['name'] = f"manipulation-reach_site_features"
|
||||
kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features"
|
||||
kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper)
|
||||
kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||
# TODO: weight scale 50, but goal scale 0.1
|
||||
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
|
||||
register(
|
||||
id=f'dmc_manipulation-reach_site_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"manipulation-reach_site_features",
|
||||
# "time_limit": 1,
|
||||
"episode_length": 250,
|
||||
"wrappers": [manipulation.reach_site.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 9,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
"policy_type": "velocity",
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1,
|
||||
}
|
||||
}
|
||||
kwargs=kwargs_dict_mani_reach_site_features_dmp
|
||||
# {
|
||||
# "name": f"manipulation-reach_site_features",
|
||||
# # "time_limit": 1,
|
||||
# "episode_length": 250,
|
||||
# "wrappers": [manipulation.reach_site.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 9,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "learn_goal": True,
|
||||
# "alpha_phase": 2,
|
||||
# "bandwidth_factor": 2,
|
||||
# "policy_type": "velocity",
|
||||
# "weights_scale": 50,
|
||||
# "goal_scale": 0.1,
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
|
||||
|
||||
kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_mani_reach_site_features_promp['name'] = f"manipulation-reach_site_features"
|
||||
kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features"
|
||||
kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper)
|
||||
kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
|
||||
register(
|
||||
id=f'dmc_manipulation-reach_site_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"manipulation-reach_site_features",
|
||||
# "time_limit": 1,
|
||||
"episode_length": 250,
|
||||
"wrappers": [manipulation.reach_site.MPWrapper],
|
||||
"traj_gen_kwargs": {
|
||||
"num_dof": 9,
|
||||
"num_basis": 5,
|
||||
"duration": 10,
|
||||
"policy_type": "velocity",
|
||||
"weights_scale": 0.2,
|
||||
"zero_start": True,
|
||||
}
|
||||
}
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_mani_reach_site_features_promp
|
||||
# {
|
||||
# "name": f"manipulation-reach_site_features",
|
||||
# # "time_limit": 1,
|
||||
# "episode_length": 250,
|
||||
# "wrappers": [manipulation.reach_site.MPWrapper],
|
||||
# "traj_gen_kwargs": {
|
||||
# "num_dof": 9,
|
||||
# "num_basis": 5,
|
||||
# "duration": 10,
|
||||
# "policy_type": "velocity",
|
||||
# "weights_scale": 0.2,
|
||||
# "zero_start": True,
|
||||
# }
|
||||
# }
|
||||
)
|
||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
|
||||
|
@ -2,17 +2,22 @@
|
||||
# License: MIT
|
||||
# Copyright (c) 2020 Denis Yarats
|
||||
import collections
|
||||
from typing import Any, Dict, Tuple
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any, Dict, Tuple, Optional, Union, Callable
|
||||
|
||||
from dm_control import composer
|
||||
import gym
|
||||
import numpy as np
|
||||
from dm_control import manipulation, suite
|
||||
from dm_control.rl import control
|
||||
from dm_env import specs
|
||||
from gym import core, spaces
|
||||
from gym import spaces
|
||||
from gym.core import ObsType
|
||||
|
||||
|
||||
def _spec_to_box(spec):
|
||||
def extract_min_max(s):
|
||||
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
|
||||
assert s.dtype == np.float64 or s.dtype == np.float32, \
|
||||
f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
|
||||
dim = int(np.prod(s.shape))
|
||||
if type(s) == specs.Array:
|
||||
bound = np.inf * np.ones(dim, dtype=s.dtype)
|
||||
@ -32,7 +37,7 @@ def _spec_to_box(spec):
|
||||
return spaces.Box(low, high, dtype=s.dtype)
|
||||
|
||||
|
||||
def _flatten_obs(obs: collections.MutableMapping):
|
||||
def _flatten_obs(obs: MutableMapping):
|
||||
"""
|
||||
Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
|
||||
Args:
|
||||
@ -42,7 +47,7 @@ def _flatten_obs(obs: collections.MutableMapping):
|
||||
|
||||
"""
|
||||
|
||||
if not isinstance(obs, collections.MutableMapping):
|
||||
if not isinstance(obs, MutableMapping):
|
||||
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
|
||||
|
||||
# Keep key order consistent for non OrderedDicts
|
||||
@ -52,47 +57,19 @@ def _flatten_obs(obs: collections.MutableMapping):
|
||||
return np.concatenate(obs_vals)
|
||||
|
||||
|
||||
class DMCWrapper(core.Env):
|
||||
def __init__(
|
||||
self,
|
||||
domain_name: str,
|
||||
task_name: str,
|
||||
task_kwargs: dict = {},
|
||||
visualize_reward: bool = True,
|
||||
from_pixels: bool = False,
|
||||
height: int = 84,
|
||||
width: int = 84,
|
||||
camera_id: int = 0,
|
||||
frame_skip: int = 1,
|
||||
environment_kwargs: dict = None,
|
||||
channels_first: bool = True
|
||||
class DMCWrapper(gym.Env):
|
||||
def __init__(self,
|
||||
env: Callable[[], Union[composer.Environment, control.Environment]],
|
||||
):
|
||||
assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.'
|
||||
self._from_pixels = from_pixels
|
||||
self._height = height
|
||||
self._width = width
|
||||
self._camera_id = camera_id
|
||||
self._frame_skip = frame_skip
|
||||
self._channels_first = channels_first
|
||||
|
||||
# create task
|
||||
if domain_name == "manipulation":
|
||||
assert not from_pixels and not task_name.endswith("_vision"), \
|
||||
"TODO: Vision interface for manipulation is different to suite and needs to be implemented"
|
||||
self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random'])
|
||||
else:
|
||||
self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
|
||||
visualize_reward=visualize_reward, environment_kwargs=environment_kwargs)
|
||||
# TODO: Currently this is required to be a function because dmc does not allow to copy composers environments
|
||||
self._env = env()
|
||||
|
||||
# action and observation space
|
||||
self._action_space = _spec_to_box([self._env.action_spec()])
|
||||
self._observation_space = _spec_to_box(self._env.observation_spec().values())
|
||||
|
||||
self._last_state = None
|
||||
self.viewer = None
|
||||
|
||||
# set seed
|
||||
self.seed(seed=task_kwargs.get('random', 1))
|
||||
self._window = None
|
||||
|
||||
def __getattr__(self, item):
|
||||
"""Propagate only non-existent properties to wrapped env."""
|
||||
@ -103,16 +80,6 @@ class DMCWrapper(core.Env):
|
||||
return getattr(self._env, item)
|
||||
|
||||
def _get_obs(self, time_step):
|
||||
if self._from_pixels:
|
||||
obs = self.render(
|
||||
mode="rgb_array",
|
||||
height=self._height,
|
||||
width=self._width,
|
||||
camera_id=self._camera_id
|
||||
)
|
||||
if self._channels_first:
|
||||
obs = obs.transpose(2, 0, 1).copy()
|
||||
else:
|
||||
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
|
||||
return obs
|
||||
|
||||
@ -126,20 +93,7 @@ class DMCWrapper(core.Env):
|
||||
|
||||
@property
|
||||
def dt(self):
|
||||
return self._env.control_timestep() * self._frame_skip
|
||||
|
||||
@property
|
||||
def base_step_limit(self):
|
||||
"""
|
||||
Returns: max_episode_steps of the underlying DMC env
|
||||
|
||||
"""
|
||||
# Accessing private attribute because DMC does not expose time_limit or step_limit.
|
||||
# Only the current time_step/time as well as the control_timestep can be accessed.
|
||||
try:
|
||||
return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip
|
||||
except AttributeError as e:
|
||||
return self._env._time_limit / self.dt
|
||||
return self._env.control_timestep()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self._action_space.seed(seed)
|
||||
@ -147,56 +101,71 @@ class DMCWrapper(core.Env):
|
||||
|
||||
def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
|
||||
assert self._action_space.contains(action)
|
||||
reward = 0
|
||||
extra = {'internal_state': self._env.physics.get_state().copy()}
|
||||
|
||||
for _ in range(self._frame_skip):
|
||||
time_step = self._env.step(action)
|
||||
reward += time_step.reward or 0.
|
||||
reward = time_step.reward or 0.
|
||||
done = time_step.last()
|
||||
if done:
|
||||
break
|
||||
|
||||
self._last_state = _flatten_obs(time_step.observation)
|
||||
obs = self._get_obs(time_step)
|
||||
extra['discount'] = time_step.discount
|
||||
|
||||
return obs, reward, done, extra
|
||||
|
||||
def reset(self) -> np.ndarray:
|
||||
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||
time_step = self._env.reset()
|
||||
self._last_state = _flatten_obs(time_step.observation)
|
||||
obs = self._get_obs(time_step)
|
||||
return obs
|
||||
|
||||
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
|
||||
if self._last_state is None:
|
||||
raise ValueError('Environment not ready to render. Call reset() first.')
|
||||
|
||||
camera_id = camera_id or self._camera_id
|
||||
def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False,
|
||||
segmentation=False, scene_option=None, render_flag_overrides=None):
|
||||
|
||||
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
||||
if mode == "rgb_array":
|
||||
height = height or self._height
|
||||
width = width or self._width
|
||||
return self._env.physics.render(height=height, width=width, camera_id=camera_id)
|
||||
return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays,
|
||||
depth=depth, segmentation=segmentation, scene_option=scene_option,
|
||||
render_flag_overrides=render_flag_overrides)
|
||||
|
||||
elif mode == 'human':
|
||||
if self.viewer is None:
|
||||
# pylint: disable=import-outside-toplevel
|
||||
# pylint: disable=g-import-not-at-top
|
||||
from gym.envs.classic_control import rendering
|
||||
self.viewer = rendering.SimpleImageViewer()
|
||||
# Render max available buffer size. Larger is only possible by altering the XML.
|
||||
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
|
||||
width=self._env.physics.model.vis.global_.offwidth,
|
||||
camera_id=camera_id)
|
||||
self.viewer.imshow(img)
|
||||
return self.viewer.isopen
|
||||
camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation,
|
||||
scene_option=scene_option, render_flag_overrides=render_flag_overrides)
|
||||
|
||||
if depth:
|
||||
img = np.dstack([img.astype(np.uint8)] * 3)
|
||||
|
||||
if mode == 'human':
|
||||
try:
|
||||
import cv2
|
||||
if self._window is None:
|
||||
self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE)
|
||||
|
||||
cv2.imshow(self.id, img[..., ::-1]) # Image in BGR
|
||||
cv2.waitKey(1)
|
||||
except ImportError:
|
||||
import pygame
|
||||
img = img.transpose((1, 0, 2))
|
||||
if self._window is None:
|
||||
pygame.init()
|
||||
pygame.display.init()
|
||||
self._window = pygame.display.set_mode(img.shape[:2])
|
||||
|
||||
self._window.blit(pygame.surfarray.make_surface(img), (0, 0))
|
||||
pygame.event.pump()
|
||||
pygame.display.flip()
|
||||
|
||||
def close(self):
|
||||
super().close()
|
||||
if self.viewer is not None and self.viewer.isopen:
|
||||
self.viewer.close()
|
||||
if self._window is not None:
|
||||
try:
|
||||
import cv2
|
||||
cv2.destroyWindow(self.id)
|
||||
except ImportError:
|
||||
import pygame
|
||||
|
||||
pygame.display.quit()
|
||||
pygame.quit()
|
||||
|
||||
@property
|
||||
def reward_range(self) -> Tuple[float, float]:
|
||||
@ -204,3 +173,8 @@ class DMCWrapper(core.Env):
|
||||
if isinstance(reward_spec, specs.BoundedArray):
|
||||
return reward_spec.minimum, reward_spec.maximum
|
||||
return -float('inf'), float('inf')
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
return {'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second': round(1.0 / self._env.control_timestep())}
|
||||
|
@ -1,3 +1,5 @@
|
||||
import numpy as np
|
||||
|
||||
import alr_envs
|
||||
|
||||
|
||||
@ -59,7 +61,8 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
|
||||
"""
|
||||
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
|
||||
# E.g. here for way to many basis functions
|
||||
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
|
||||
# env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
|
||||
env = alr_envs.make(env_name, seed)
|
||||
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
||||
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
||||
|
||||
@ -72,15 +75,16 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
|
||||
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample() * 1000
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, done, info = env.step(ac)
|
||||
rewards += reward
|
||||
|
||||
if done:
|
||||
print(rewards)
|
||||
print(i, rewards)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
print(obs)
|
||||
|
||||
return obs
|
||||
|
||||
|
||||
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
@ -139,7 +143,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
render = True
|
||||
render = False
|
||||
# # DMP
|
||||
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
||||
#
|
||||
@ -150,7 +154,7 @@ if __name__ == '__main__':
|
||||
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
||||
|
||||
# Altered basis functions
|
||||
example_custom_mp("HopperJumpSparseProMP-v0", seed=10, iterations=10, render=render)
|
||||
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
|
||||
|
||||
# Custom MP
|
||||
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||
|
@ -36,7 +36,7 @@ for _task in _goal_change_envs:
|
||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||
kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper)
|
||||
kwargs_dict_goal_change_promp['name'] = _task
|
||||
kwargs_dict_goal_change_promp['name'] = f'metaworld:{_task}'
|
||||
|
||||
register(
|
||||
id=_env_id,
|
||||
@ -52,7 +52,7 @@ for _task in _object_change_envs:
|
||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||
kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper)
|
||||
kwargs_dict_object_change_promp['name'] = _task
|
||||
kwargs_dict_object_change_promp['name'] = f'metaworld:{_task}'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
@ -77,7 +77,7 @@ for _task in _goal_and_object_change_envs:
|
||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||
kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper)
|
||||
kwargs_dict_goal_and_object_change_promp['name'] = _task
|
||||
kwargs_dict_goal_and_object_change_promp['name'] = f'metaworld:{_task}'
|
||||
|
||||
register(
|
||||
id=_env_id,
|
||||
@ -93,7 +93,7 @@ for _task in _goal_and_endeffector_change_envs:
|
||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||
kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper)
|
||||
kwargs_dict_goal_and_endeffector_change_promp['name'] = _task
|
||||
kwargs_dict_goal_and_endeffector_change_promp['name'] = f'metaworld:{_task}'
|
||||
|
||||
register(
|
||||
id=_env_id,
|
||||
|
@ -27,7 +27,6 @@ DEFAULT_BB_DICT_ProMP = {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6
|
||||
kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075
|
||||
@ -35,7 +34,7 @@ kwargs_dict_reacher_promp['basis_generator_kwargs']['num_basis'] = 6
|
||||
kwargs_dict_reacher_promp['name'] = "Reacher-v2"
|
||||
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper)
|
||||
register(
|
||||
id='Reacher2dProMP-v2',
|
||||
id='ReacherProMP-v2',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_promp
|
||||
)
|
||||
|
@ -1,65 +1 @@
|
||||
import re
|
||||
from typing import Union
|
||||
|
||||
import gym
|
||||
from gym.envs.registration import register
|
||||
|
||||
from alr_envs.utils.make_env_helpers import make
|
||||
|
||||
|
||||
def make_dmc(
|
||||
id: str,
|
||||
seed: int = 1,
|
||||
visualize_reward: bool = True,
|
||||
from_pixels: bool = False,
|
||||
height: int = 84,
|
||||
width: int = 84,
|
||||
camera_id: int = 0,
|
||||
frame_skip: int = 1,
|
||||
episode_length: Union[None, int] = None,
|
||||
environment_kwargs: dict = {},
|
||||
time_limit: Union[None, float] = None,
|
||||
channels_first: bool = True
|
||||
):
|
||||
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
|
||||
# License: MIT
|
||||
# Copyright (c) 2020 Denis Yarats
|
||||
|
||||
if not re.match(r"\w+-\w+", id):
|
||||
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
|
||||
domain_name, task_name = id.split("-")
|
||||
|
||||
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
|
||||
|
||||
if from_pixels:
|
||||
assert not visualize_reward, 'Cannot use visualize reward when learning from pixels.'
|
||||
|
||||
# Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
|
||||
episode_length = episode_length or (250 if domain_name == "manipulation" else 1000)
|
||||
|
||||
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
|
||||
if env_id not in gym.envs.registry.env_specs:
|
||||
task_kwargs = {'random': seed}
|
||||
# if seed is not None:
|
||||
# task_kwargs['random'] = seed
|
||||
if time_limit is not None:
|
||||
task_kwargs['time_limit'] = time_limit
|
||||
register(
|
||||
id=env_id,
|
||||
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
|
||||
kwargs=dict(
|
||||
domain_name=domain_name,
|
||||
task_name=task_name,
|
||||
task_kwargs=task_kwargs,
|
||||
environment_kwargs=environment_kwargs,
|
||||
visualize_reward=visualize_reward,
|
||||
from_pixels=from_pixels,
|
||||
height=height,
|
||||
width=width,
|
||||
camera_id=camera_id,
|
||||
frame_skip=frame_skip,
|
||||
channels_first=channels_first,
|
||||
),
|
||||
max_episode_steps=max_episode_steps,
|
||||
)
|
||||
return gym.make(env_id)
|
||||
|
@ -1,20 +1,41 @@
|
||||
import warnings
|
||||
import re
|
||||
import uuid
|
||||
from collections.abc import MutableMapping
|
||||
from copy import deepcopy
|
||||
from typing import Iterable, Type, Union, MutableMapping
|
||||
from math import ceil
|
||||
from typing import Iterable, Type, Union
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.envs.registration import EnvSpec, registry
|
||||
|
||||
import alr_envs
|
||||
|
||||
try:
|
||||
from dm_control import suite, manipulation, composer
|
||||
from dm_control.rl import control
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import metaworld
|
||||
except Exception:
|
||||
# catch Exception due to Mujoco-py
|
||||
pass
|
||||
|
||||
from gym.envs.registration import registry
|
||||
from gym.envs.registration import register
|
||||
from gym.wrappers import TimeAwareObservation
|
||||
|
||||
from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
|
||||
from alr_envs.black_box.factory.controller_factory import get_controller
|
||||
from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
|
||||
from alr_envs.black_box.factory.controller_factory import get_controller
|
||||
from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
|
||||
from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from alr_envs.utils.utils import nested_update
|
||||
|
||||
ALL_FRAMEWORK_TYPES = ['meta', 'dmc', 'gym']
|
||||
|
||||
|
||||
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||
"""
|
||||
@ -70,57 +91,25 @@ def _make(env_id: str, seed, **kwargs):
|
||||
# env_id.split(':')
|
||||
# if 'dmc' :
|
||||
|
||||
try:
|
||||
# This access is required to allow for nested dict updates for BB envs
|
||||
spec = registry.get(env_id)
|
||||
all_kwargs = deepcopy(spec.kwargs)
|
||||
nested_update(all_kwargs, kwargs)
|
||||
kwargs = all_kwargs
|
||||
|
||||
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
|
||||
if env_id.startswith("dmc"):
|
||||
kwargs.update({"seed": seed})
|
||||
|
||||
# Gym
|
||||
env = gym.make(env_id, **kwargs)
|
||||
env.seed(seed)
|
||||
env.action_space.seed(seed)
|
||||
env.observation_space.seed(seed)
|
||||
except (gym.error.Error, AttributeError):
|
||||
|
||||
# MetaWorld env
|
||||
import metaworld
|
||||
if env_id in metaworld.ML1.ENV_NAMES:
|
||||
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
||||
|
||||
# setting this avoids generating the same initialization after each reset
|
||||
env._freeze_rand_vec = False
|
||||
env.seeded_rand_vec = True
|
||||
|
||||
# Manually set spec, as metaworld environments are not registered via gym
|
||||
env.unwrapped.spec = EnvSpec(env_id)
|
||||
# Set Timelimit based on the maximum allowed path length of the environment
|
||||
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
|
||||
# env.seed(seed)
|
||||
# env.action_space.seed(seed)
|
||||
# env.observation_space.seed(seed)
|
||||
# env.goal_space.seed(seed)
|
||||
|
||||
if ':' in env_id:
|
||||
split_id = env_id.split(':')
|
||||
framework, env_id = split_id[-2:]
|
||||
else:
|
||||
# DMC
|
||||
from alr_envs import make_dmc
|
||||
env = make_dmc(env_id, seed=seed, **kwargs)
|
||||
framework = None
|
||||
|
||||
if not env.base_step_limit == env.spec.max_episode_steps:
|
||||
raise ValueError(f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym "
|
||||
f"is different from the DMC environment specification of {env.base_step_limit} steps.")
|
||||
if framework == 'metaworld':
|
||||
# MetaWorld env
|
||||
env = make_metaworld(env_id, seed=seed, **kwargs)
|
||||
elif framework == 'dmc':
|
||||
# DeepMind Controlp
|
||||
env = make_dmc(env_id, seed=seed, **kwargs)
|
||||
else:
|
||||
env = make_gym(env_id, seed=seed, **kwargs)
|
||||
|
||||
return env
|
||||
|
||||
|
||||
def _make_wrapped_env(
|
||||
env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs
|
||||
):
|
||||
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
|
||||
"""
|
||||
Helper function for creating a wrapped gym environment using MPs.
|
||||
It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
|
||||
@ -149,7 +138,7 @@ def _make_wrapped_env(
|
||||
|
||||
def make_bb(
|
||||
env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
|
||||
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
|
||||
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1,
|
||||
**kwargs):
|
||||
"""
|
||||
This can also be used standalone for manually building a custom DMP environment.
|
||||
@ -167,7 +156,6 @@ def make_bb(
|
||||
|
||||
"""
|
||||
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
||||
|
||||
learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
|
||||
do_replanning = black_box_kwargs.get('replanning_schedule')
|
||||
@ -176,12 +164,16 @@ def make_bb(
|
||||
|
||||
if learn_sub_trajs or do_replanning:
|
||||
# add time_step observation when replanning
|
||||
kwargs['wrappers'].append(TimeAwareObservation)
|
||||
if not any(issubclass(w, TimeAwareObservation) for w in kwargs['wrappers']):
|
||||
# Add as first wrapper in order to alter observation
|
||||
kwargs['wrappers'].insert(0, TimeAwareObservation)
|
||||
|
||||
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
|
||||
env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
||||
|
||||
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item())
|
||||
|
||||
if black_box_kwargs.get('duration') is None:
|
||||
black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
|
||||
black_box_kwargs['duration'] = env.spec.max_episode_steps * env.dt
|
||||
if phase_kwargs.get('tau') is None:
|
||||
phase_kwargs['tau'] = black_box_kwargs['duration']
|
||||
|
||||
@ -194,7 +186,7 @@ def make_bb(
|
||||
controller = get_controller(**controller_kwargs)
|
||||
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
|
||||
|
||||
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
|
||||
bb_env = BlackBoxWrapper(env, trajectory_generator=traj_gen, tracking_controller=controller,
|
||||
**black_box_kwargs)
|
||||
|
||||
return bb_env
|
||||
@ -249,6 +241,109 @@ def make_bb_env_helper(**kwargs):
|
||||
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
||||
|
||||
|
||||
def make_dmc(
|
||||
env_id: Union[str, composer.Environment, control.Environment],
|
||||
seed: int = None,
|
||||
visualize_reward: bool = True,
|
||||
time_limit: Union[None, float] = None,
|
||||
**kwargs
|
||||
):
|
||||
if not re.match(r"\w+-\w+", env_id):
|
||||
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
|
||||
domain_name, task_name = env_id.split("-")
|
||||
|
||||
if task_name.endswith("_vision"):
|
||||
# TODO
|
||||
raise ValueError("The vision interface for manipulation tasks is currently not supported.")
|
||||
|
||||
if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL:
|
||||
raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.')
|
||||
|
||||
# env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
|
||||
gym_id = uuid.uuid4().hex + '-v1'
|
||||
|
||||
task_kwargs = {'random': seed}
|
||||
if time_limit is not None:
|
||||
task_kwargs['time_limit'] = time_limit
|
||||
|
||||
# create task
|
||||
# Accessing private attribute because DMC does not expose time_limit or step_limit.
|
||||
# Only the current time_step/time as well as the control_timestep can be accessed.
|
||||
if domain_name == "manipulation":
|
||||
env = manipulation.load(environment_name=task_name, seed=seed)
|
||||
max_episode_steps = ceil(env._time_limit / env.control_timestep())
|
||||
else:
|
||||
env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
|
||||
visualize_reward=visualize_reward, environment_kwargs=kwargs)
|
||||
max_episode_steps = int(env._step_limit)
|
||||
|
||||
register(
|
||||
id=gym_id,
|
||||
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
|
||||
kwargs={'env': lambda: env},
|
||||
max_episode_steps=max_episode_steps,
|
||||
)
|
||||
|
||||
env = gym.make(gym_id)
|
||||
env.seed(seed=seed)
|
||||
return env
|
||||
|
||||
|
||||
def make_metaworld(env_id, seed, **kwargs):
|
||||
if env_id not in metaworld.ML1.ENV_NAMES:
|
||||
raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.')
|
||||
|
||||
_env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
||||
|
||||
# setting this avoids generating the same initialization after each reset
|
||||
_env._freeze_rand_vec = False
|
||||
# New argument to use global seeding
|
||||
_env.seeded_rand_vec = True
|
||||
|
||||
# Manually set spec, as metaworld environments are not registered via gym
|
||||
# _env.unwrapped.spec = EnvSpec(env_id)
|
||||
# Set Timelimit based on the maximum allowed path length of the environment
|
||||
# _env = gym.wrappers.TimeLimit(_env, max_episode_steps=_env.max_path_length)
|
||||
# _env.seed(seed)
|
||||
# _env.action_space.seed(seed)
|
||||
# _env.observation_space.seed(seed)
|
||||
# _env.goal_space.seed(seed)
|
||||
|
||||
gym_id = uuid.uuid4().hex + '-v1'
|
||||
|
||||
register(
|
||||
id=gym_id,
|
||||
entry_point=lambda: _env,
|
||||
max_episode_steps=_env.max_path_length,
|
||||
)
|
||||
|
||||
# TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld
|
||||
env = gym.make(gym_id, disable_env_checker=True)
|
||||
env.seed(seed=seed)
|
||||
return env
|
||||
|
||||
|
||||
def make_gym(env_id, seed, **kwargs):
|
||||
# This access is required to allow for nested dict updates for BB envs
|
||||
spec = registry.get(env_id)
|
||||
all_kwargs = deepcopy(spec.kwargs)
|
||||
nested_update(all_kwargs, kwargs)
|
||||
kwargs = all_kwargs
|
||||
|
||||
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
|
||||
# if env_id.startswith("dmc") or any(s in env_id.lower() for s in ['promp', 'dmp', 'prodmp']):
|
||||
all_bb_envs = sum(alr_envs.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), [])
|
||||
if env_id.startswith("dmc") or env_id in all_bb_envs:
|
||||
kwargs.update({"seed": seed})
|
||||
|
||||
# Gym
|
||||
env = gym.make(env_id, **kwargs)
|
||||
env.seed(seed)
|
||||
env.action_space.seed(seed)
|
||||
env.observation_space.seed(seed)
|
||||
return env
|
||||
|
||||
|
||||
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
|
||||
"""
|
||||
When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.
|
||||
|
@ -40,9 +40,9 @@ class TestMPEnvironments(unittest.TestCase):
|
||||
for i in range(iterations):
|
||||
observations.append(obs)
|
||||
|
||||
ac = env.action_space.sample()
|
||||
actions = env.action_space.sample()
|
||||
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||
obs, reward, done, info = env.step(ac)
|
||||
obs, reward, done, info = env.step(actions)
|
||||
|
||||
self._verify_observations(obs, env.observation_space, "step()")
|
||||
self._verify_reward(reward)
|
||||
@ -55,13 +55,13 @@ class TestMPEnvironments(unittest.TestCase):
|
||||
env.render("human")
|
||||
|
||||
if done:
|
||||
obs = env.reset()
|
||||
break
|
||||
|
||||
assert done, "Done flag is not True after max episode length."
|
||||
assert done, "Done flag is not True after end of episode."
|
||||
observations.append(obs)
|
||||
env.close()
|
||||
del env
|
||||
return np.array(observations), np.array(rewards), np.array(dones)
|
||||
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
|
||||
|
||||
def _run_env_determinism(self, ids):
|
||||
seed = 0
|
||||
@ -70,8 +70,9 @@ class TestMPEnvironments(unittest.TestCase):
|
||||
traj1 = self._run_env(env_id, seed=seed)
|
||||
traj2 = self._run_env(env_id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||
self.assertTrue(np.allclose(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
@ -81,7 +82,7 @@ class TestMPEnvironments(unittest.TestCase):
|
||||
f"not contained in observation space {observation_space}.")
|
||||
|
||||
def _verify_reward(self, reward):
|
||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
|
||||
|
||||
def _verify_done(self, done):
|
||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||
@ -113,12 +114,12 @@ class TestMPEnvironments(unittest.TestCase):
|
||||
def test_dmc_environment_functionality(self):
|
||||
"""Tests that environments runs without errors using random actions for DMC MP envs."""
|
||||
with self.subTest(msg="DMP"):
|
||||
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']:
|
||||
for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']:
|
||||
with self.subTest(msg=env_id):
|
||||
self._run_env(env_id)
|
||||
|
||||
with self.subTest(msg="ProMP"):
|
||||
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']:
|
||||
for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']:
|
||||
with self.subTest(msg=env_id):
|
||||
self._run_env(env_id)
|
||||
|
||||
@ -151,9 +152,9 @@ class TestMPEnvironments(unittest.TestCase):
|
||||
def test_dmc_environment_determinism(self):
|
||||
"""Tests that identical seeds produce identical trajectories for DMC MP Envs."""
|
||||
with self.subTest(msg="DMP"):
|
||||
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"])
|
||||
self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"])
|
||||
with self.subTest(msg="ProMP"):
|
||||
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"])
|
||||
self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"])
|
||||
|
||||
def test_metaworld_environment_determinism(self):
|
||||
"""Tests that identical seeds produce identical trajectories for Metaworld MP Envs."""
|
||||
|
@ -7,8 +7,8 @@ from dm_control import suite, manipulation
|
||||
|
||||
from alr_envs import make
|
||||
|
||||
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
|
||||
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
|
||||
DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
|
||||
MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
|
||||
SEED = 1
|
||||
|
||||
|
||||
@ -29,9 +29,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
print(env_id)
|
||||
env: gym.Env = make(env_id, seed=seed)
|
||||
rewards = []
|
||||
observations = []
|
||||
actions = []
|
||||
dones = []
|
||||
obs = env.reset()
|
||||
self._verify_observations(obs, env.observation_space, "reset()")
|
||||
@ -43,6 +45,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
||||
observations.append(obs)
|
||||
|
||||
ac = env.action_space.sample()
|
||||
actions.append(ac)
|
||||
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||
obs, reward, done, info = env.step(ac)
|
||||
|
||||
@ -57,13 +60,13 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
||||
env.render("human")
|
||||
|
||||
if done:
|
||||
obs = env.reset()
|
||||
break
|
||||
|
||||
assert done, "Done flag is not True after max episode length."
|
||||
assert done, "Done flag is not True after end of episode."
|
||||
observations.append(obs)
|
||||
env.close()
|
||||
del env
|
||||
return np.array(observations), np.array(rewards), np.array(dones)
|
||||
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
|
||||
|
||||
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
||||
self.assertTrue(observation_space.contains(obs),
|
||||
@ -71,7 +74,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
||||
f"not contained in observation space {observation_space}.")
|
||||
|
||||
def _verify_reward(self, reward):
|
||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
|
||||
|
||||
def _verify_done(self, done):
|
||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||
@ -91,8 +94,9 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
||||
traj1 = self._run_env(env_id, seed=seed)
|
||||
traj2 = self._run_env(env_id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
@ -111,11 +115,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
||||
traj1 = self._run_env(env_id, seed=seed)
|
||||
traj2 = self._run_env(env_id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -6,7 +6,7 @@ import numpy as np
|
||||
from alr_envs import make
|
||||
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
|
||||
|
||||
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
|
||||
ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
|
||||
SEED = 1
|
||||
|
||||
|
||||
@ -57,9 +57,9 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
|
||||
env.render("human")
|
||||
|
||||
if done:
|
||||
obs = env.reset()
|
||||
break
|
||||
|
||||
assert done, "Done flag is not True after max episode length."
|
||||
assert done, "Done flag is not True after end of episode."
|
||||
observations.append(obs)
|
||||
env.close()
|
||||
del env
|
||||
@ -71,7 +71,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
|
||||
f"not contained in observation space {observation_space}.")
|
||||
|
||||
def _verify_reward(self, reward):
|
||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
|
||||
|
||||
def _verify_done(self, done):
|
||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||
@ -94,7 +94,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
|
||||
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user