naming convention and running tests
This commit is contained in:
parent
786da2290d
commit
ade83b5ae6
@ -113,7 +113,7 @@ print("OpenAI Gym MP tasks:")
|
|||||||
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
|
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
|
||||||
|
|
||||||
print("Deepmind Control MP tasks:")
|
print("Deepmind Control MP tasks:")
|
||||||
print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS)
|
print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||||
|
|
||||||
print("MetaWorld MP tasks:")
|
print("MetaWorld MP tasks:")
|
||||||
print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS)
|
print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS)
|
||||||
|
@ -1,15 +1,14 @@
|
|||||||
from alr_envs import dmc, meta, open_ai
|
from alr_envs import dmc, meta, open_ai
|
||||||
from alr_envs.utils import make_dmc
|
|
||||||
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
|
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
|
||||||
|
|
||||||
# Convenience function for all MP environments
|
# Convenience function for all MP environments
|
||||||
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
|
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||||
from .dmc import ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS
|
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||||
from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS
|
from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||||
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
|
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||||
|
|
||||||
ALL_MOTION_PRIMITIVE_ENVIRONMENTS = {
|
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
|
||||||
key: value + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
|
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||||
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
|
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
|
||||||
ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key]
|
ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key]
|
||||||
for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()}
|
for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()}
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
from typing import Tuple, Union, Optional
|
import os
|
||||||
|
os.environ["MUJOCO_GL"] = "egl"
|
||||||
|
|
||||||
|
from typing import Tuple, Optional
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -67,7 +70,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
|
|
||||||
def observation(self, observation):
|
def observation(self, observation):
|
||||||
# return context space if we are
|
# return context space if we are
|
||||||
obs = observation[self.env.context_mask] if self.return_context_observation else observation
|
mask = self.env.context_mask
|
||||||
|
if self.is_time_aware:
|
||||||
|
mask = np.append(mask, False)
|
||||||
|
obs = observation[mask] if self.return_context_observation else observation
|
||||||
# cast dtype because metaworld returns incorrect that throws gym error
|
# cast dtype because metaworld returns incorrect that throws gym error
|
||||||
return obs.astype(self.observation_space.dtype)
|
return obs.astype(self.observation_space.dtype)
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from copy import deepcopy
|
|||||||
|
|
||||||
from . import manipulation, suite
|
from . import manipulation, suite
|
||||||
|
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||||
|
|
||||||
from gym.envs.registration import register
|
from gym.envs.registration import register
|
||||||
|
|
||||||
@ -47,10 +47,9 @@ DEFAULT_BB_DICT_DMP = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# DeepMind Control Suite (DMC)
|
# DeepMind Control Suite (DMC)
|
||||||
kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_bic_dmp['name'] = f"ball_in_cup-catch"
|
kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch"
|
||||||
kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
||||||
kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
|
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
|
||||||
@ -58,304 +57,313 @@ kwargs_dict_bic_dmp['controller_kwargs']['p_gains'] = 50
|
|||||||
kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1
|
kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1
|
||||||
register(
|
register(
|
||||||
id=f'dmc_ball_in_cup-catch_dmp-v0',
|
id=f'dmc_ball_in_cup-catch_dmp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_bic_dmp
|
||||||
"name": f"ball_in_cup-catch",
|
# {
|
||||||
"time_limit": 20,
|
# "name": f"ball_in_cup-catch",
|
||||||
"episode_length": 1000,
|
# "time_limit": 20,
|
||||||
"wrappers": [suite.ball_in_cup.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.ball_in_cup.MPWrapper],
|
||||||
"num_dof": 2,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 2,
|
||||||
"duration": 20,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 20,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "motor",
|
# "bandwidth_factor": 2,
|
||||||
"goal_scale": 0.1,
|
# "policy_type": "motor",
|
||||||
"policy_kwargs": {
|
# "goal_scale": 0.1,
|
||||||
"p_gains": 50,
|
# "policy_kwargs": {
|
||||||
"d_gains": 1
|
# "p_gains": 50,
|
||||||
}
|
# "d_gains": 1
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
|
||||||
|
|
||||||
kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_bic_promp['name'] = f"ball_in_cup-catch"
|
kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
|
||||||
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
||||||
kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50
|
kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50
|
||||||
kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1
|
kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1
|
||||||
register(
|
register(
|
||||||
id=f'dmc_ball_in_cup-catch_promp-v0',
|
id=f'dmc_ball_in_cup-catch_promp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_bic_promp
|
||||||
"name": f"ball_in_cup-catch",
|
# {
|
||||||
"time_limit": 20,
|
# "name": f"ball_in_cup-catch",
|
||||||
"episode_length": 1000,
|
# "time_limit": 20,
|
||||||
"wrappers": [suite.ball_in_cup.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.ball_in_cup.MPWrapper],
|
||||||
"num_dof": 2,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 2,
|
||||||
"duration": 20,
|
# "num_basis": 5,
|
||||||
"policy_type": "motor",
|
# "duration": 20,
|
||||||
"zero_start": True,
|
# "policy_type": "motor",
|
||||||
"policy_kwargs": {
|
# "zero_start": True,
|
||||||
"p_gains": 50,
|
# "policy_kwargs": {
|
||||||
"d_gains": 1
|
# "p_gains": 50,
|
||||||
}
|
# "d_gains": 1
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
|
||||||
|
|
||||||
kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_reacher_easy_dmp['name'] = f"reacher-easy"
|
kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy"
|
||||||
kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper)
|
kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper)
|
||||||
kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
# TODO: weight scale 50, but goal scale 0.1
|
||||||
|
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||||
kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50
|
kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50
|
||||||
kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1
|
kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1
|
||||||
register(
|
register(
|
||||||
id=f'dmc_reacher-easy_dmp-v0',
|
id=f'dmc_reacher-easy_dmp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_bic_dmp
|
||||||
"name": f"reacher-easy",
|
# {
|
||||||
"time_limit": 20,
|
# "name": f"reacher-easy",
|
||||||
"episode_length": 1000,
|
# "time_limit": 20,
|
||||||
"wrappers": [suite.reacher.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.reacher.MPWrapper],
|
||||||
"num_dof": 2,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 2,
|
||||||
"duration": 20,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 20,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "motor",
|
# "bandwidth_factor": 2,
|
||||||
"weights_scale": 50,
|
# "policy_type": "motor",
|
||||||
"goal_scale": 0.1,
|
# "weights_scale": 50,
|
||||||
"policy_kwargs": {
|
# "goal_scale": 0.1,
|
||||||
"p_gains": 50,
|
# "policy_kwargs": {
|
||||||
"d_gains": 1
|
# "p_gains": 50,
|
||||||
}
|
# "d_gains": 1
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
|
||||||
|
|
||||||
kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_reacher_easy_promp['name'] = f"reacher-easy"
|
kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy"
|
||||||
kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
|
kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
|
||||||
kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50
|
kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50
|
||||||
kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1
|
kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1
|
||||||
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||||
register(
|
register(
|
||||||
id=f'dmc_reacher-easy_promp-v0',
|
id=f'dmc_reacher-easy_promp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_reacher_easy_promp
|
||||||
"name": f"reacher-easy",
|
# {
|
||||||
"time_limit": 20,
|
# "name": f"reacher-easy",
|
||||||
"episode_length": 1000,
|
# "time_limit": 20,
|
||||||
"wrappers": [suite.reacher.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.reacher.MPWrapper],
|
||||||
"num_dof": 2,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 2,
|
||||||
"duration": 20,
|
# "num_basis": 5,
|
||||||
"policy_type": "motor",
|
# "duration": 20,
|
||||||
"weights_scale": 0.2,
|
# "policy_type": "motor",
|
||||||
"zero_start": True,
|
# "weights_scale": 0.2,
|
||||||
"policy_kwargs": {
|
# "zero_start": True,
|
||||||
"p_gains": 50,
|
# "policy_kwargs": {
|
||||||
"d_gains": 1
|
# "p_gains": 50,
|
||||||
}
|
# "d_gains": 1
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
|
||||||
|
|
||||||
kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_reacher_hard_dmp['name'] = f"reacher-hard"
|
kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard"
|
||||||
kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper)
|
kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper)
|
||||||
kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
# TODO: weight scale 50, but goal scale 0.1
|
||||||
|
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||||
kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50
|
kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50
|
||||||
kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1
|
kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1
|
||||||
register(
|
register(
|
||||||
id=f'dmc_reacher-hard_dmp-v0',
|
id=f'dmc_reacher-hard_dmp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_reacher_hard_dmp
|
||||||
"name": f"reacher-hard",
|
# {
|
||||||
"time_limit": 20,
|
# "name": f"reacher-hard",
|
||||||
"episode_length": 1000,
|
# "time_limit": 20,
|
||||||
"wrappers": [suite.reacher.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.reacher.MPWrapper],
|
||||||
"num_dof": 2,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 2,
|
||||||
"duration": 20,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 20,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "motor",
|
# "bandwidth_factor": 2,
|
||||||
"weights_scale": 50,
|
# "policy_type": "motor",
|
||||||
"goal_scale": 0.1,
|
# "weights_scale": 50,
|
||||||
"policy_kwargs": {
|
# "goal_scale": 0.1,
|
||||||
"p_gains": 50,
|
# "policy_kwargs": {
|
||||||
"d_gains": 1
|
# "p_gains": 50,
|
||||||
}
|
# "d_gains": 1
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
|
||||||
|
|
||||||
kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_reacher_hard_promp['name'] = f"reacher-hard"
|
kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard"
|
||||||
kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
|
kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
|
||||||
kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50
|
kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50
|
||||||
kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1
|
kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1
|
||||||
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||||
register(
|
register(
|
||||||
id=f'dmc_reacher-hard_promp-v0',
|
id=f'dmc_reacher-hard_promp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_reacher_hard_promp
|
||||||
"name": f"reacher-hard",
|
# {
|
||||||
"time_limit": 20,
|
# "name": f"reacher-hard",
|
||||||
"episode_length": 1000,
|
# "time_limit": 20,
|
||||||
"wrappers": [suite.reacher.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.reacher.MPWrapper],
|
||||||
"num_dof": 2,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 2,
|
||||||
"duration": 20,
|
# "num_basis": 5,
|
||||||
"policy_type": "motor",
|
# "duration": 20,
|
||||||
"weights_scale": 0.2,
|
# "policy_type": "motor",
|
||||||
"zero_start": True,
|
# "weights_scale": 0.2,
|
||||||
"policy_kwargs": {
|
# "zero_start": True,
|
||||||
"p_gains": 50,
|
# "policy_kwargs": {
|
||||||
"d_gains": 1
|
# "p_gains": 50,
|
||||||
}
|
# "d_gains": 1
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
|
||||||
|
|
||||||
_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
|
_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
|
||||||
|
|
||||||
for _task in _dmc_cartpole_tasks:
|
for _task in _dmc_cartpole_tasks:
|
||||||
_env_id = f'dmc_cartpole-{_task}_dmp-v0'
|
_env_id = f'dmc_cartpole-{_task}_dmp-v0'
|
||||||
kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_cartpole_dmp['name'] = f"cartpole-{_task}"
|
kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}"
|
||||||
kwargs_dict_cartpole_dmp['camera_id'] = 0
|
|
||||||
kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper)
|
kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper)
|
||||||
kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs'][
|
||||||
|
'weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
||||||
kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10
|
kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10
|
||||||
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
|
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_cartpole_dmp
|
||||||
"name": f"cartpole-{_task}",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"cartpole-{_task}",
|
||||||
"camera_id": 0,
|
# # "time_limit": 1,
|
||||||
"episode_length": 1000,
|
# "camera_id": 0,
|
||||||
"wrappers": [suite.cartpole.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.cartpole.MPWrapper],
|
||||||
"num_dof": 1,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 1,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 10,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "motor",
|
# "bandwidth_factor": 2,
|
||||||
"weights_scale": 50,
|
# "policy_type": "motor",
|
||||||
"goal_scale": 0.1,
|
# "weights_scale": 50,
|
||||||
"policy_kwargs": {
|
# "goal_scale": 0.1,
|
||||||
"p_gains": 10,
|
# "policy_kwargs": {
|
||||||
"d_gains": 10
|
# "p_gains": 10,
|
||||||
}
|
# "d_gains": 10
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||||
|
|
||||||
_env_id = f'dmc_cartpole-{_task}_promp-v0'
|
_env_id = f'dmc_cartpole-{_task}_promp-v0'
|
||||||
kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_cartpole_promp['name'] = f"cartpole-{_task}"
|
kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}"
|
||||||
kwargs_dict_cartpole_promp['camera_id'] = 0
|
|
||||||
kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper)
|
kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper)
|
||||||
kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10
|
kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10
|
||||||
kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10
|
kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10
|
||||||
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_cartpole_promp
|
||||||
"name": f"cartpole-{_task}",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"cartpole-{_task}",
|
||||||
"camera_id": 0,
|
# # "time_limit": 1,
|
||||||
"episode_length": 1000,
|
# "camera_id": 0,
|
||||||
"wrappers": [suite.cartpole.MPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.cartpole.MPWrapper],
|
||||||
"num_dof": 1,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 1,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"policy_type": "motor",
|
# "duration": 10,
|
||||||
"weights_scale": 0.2,
|
# "policy_type": "motor",
|
||||||
"zero_start": True,
|
# "weights_scale": 0.2,
|
||||||
"policy_kwargs": {
|
# "zero_start": True,
|
||||||
"p_gains": 10,
|
# "policy_kwargs": {
|
||||||
"d_gains": 10
|
# "p_gains": 10,
|
||||||
}
|
# "d_gains": 10
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_cartpole2poles_dmp['name'] = f"cartpole-two_poles"
|
kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles"
|
||||||
kwargs_dict_cartpole2poles_dmp['camera_id'] = 0
|
|
||||||
kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
|
kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
|
||||||
kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
# TODO: weight scale 50, but goal scale 0.1
|
||||||
|
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||||
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10
|
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10
|
||||||
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
|
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
|
||||||
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
|
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_cartpole2poles_dmp
|
||||||
"name": f"cartpole-two_poles",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"cartpole-two_poles",
|
||||||
"camera_id": 0,
|
# # "time_limit": 1,
|
||||||
"episode_length": 1000,
|
# "camera_id": 0,
|
||||||
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
||||||
"num_dof": 1,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 1,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 10,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "motor",
|
# "bandwidth_factor": 2,
|
||||||
"weights_scale": 50,
|
# "policy_type": "motor",
|
||||||
"goal_scale": 0.1,
|
# "weights_scale": 50,
|
||||||
"policy_kwargs": {
|
# "goal_scale": 0.1,
|
||||||
"p_gains": 10,
|
# "policy_kwargs": {
|
||||||
"d_gains": 10
|
# "p_gains": 10,
|
||||||
}
|
# "d_gains": 10
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||||
|
|
||||||
kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_cartpole2poles_promp['name'] = f"cartpole-two_poles"
|
kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles"
|
||||||
kwargs_dict_cartpole2poles_promp['camera_id'] = 0
|
|
||||||
kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
|
kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
|
||||||
kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10
|
kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10
|
||||||
kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10
|
kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10
|
||||||
@ -363,70 +371,71 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
|
|||||||
_env_id = f'dmc_cartpole-two_poles_promp-v0'
|
_env_id = f'dmc_cartpole-two_poles_promp-v0'
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_cartpole2poles_promp
|
||||||
"name": f"cartpole-two_poles",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"cartpole-two_poles",
|
||||||
"camera_id": 0,
|
# # "time_limit": 1,
|
||||||
"episode_length": 1000,
|
# "camera_id": 0,
|
||||||
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.cartpole.TwoPolesMPWrapper],
|
||||||
"num_dof": 1,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 1,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"policy_type": "motor",
|
# "duration": 10,
|
||||||
"weights_scale": 0.2,
|
# "policy_type": "motor",
|
||||||
"zero_start": True,
|
# "weights_scale": 0.2,
|
||||||
"policy_kwargs": {
|
# "zero_start": True,
|
||||||
"p_gains": 10,
|
# "policy_kwargs": {
|
||||||
"d_gains": 10
|
# "p_gains": 10,
|
||||||
}
|
# "d_gains": 10
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_cartpole3poles_dmp['name'] = f"cartpole-three_poles"
|
kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles"
|
||||||
kwargs_dict_cartpole3poles_dmp['camera_id'] = 0
|
|
||||||
kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
|
kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
|
||||||
kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
# TODO: weight scale 50, but goal scale 0.1
|
||||||
|
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||||
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10
|
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10
|
||||||
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
|
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
|
||||||
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
|
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_cartpole3poles_dmp
|
||||||
"name": f"cartpole-three_poles",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"cartpole-three_poles",
|
||||||
"camera_id": 0,
|
# # "time_limit": 1,
|
||||||
"episode_length": 1000,
|
# "camera_id": 0,
|
||||||
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
||||||
"num_dof": 1,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 1,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 10,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "motor",
|
# "bandwidth_factor": 2,
|
||||||
"weights_scale": 50,
|
# "policy_type": "motor",
|
||||||
"goal_scale": 0.1,
|
# "weights_scale": 50,
|
||||||
"policy_kwargs": {
|
# "goal_scale": 0.1,
|
||||||
"p_gains": 10,
|
# "policy_kwargs": {
|
||||||
"d_gains": 10
|
# "p_gains": 10,
|
||||||
}
|
# "d_gains": 10
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||||
|
|
||||||
kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_cartpole3poles_promp['name'] = f"cartpole-three_poles"
|
kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles"
|
||||||
kwargs_dict_cartpole3poles_promp['camera_id'] = 0
|
|
||||||
kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
|
kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
|
||||||
kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10
|
kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10
|
||||||
kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10
|
kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10
|
||||||
@ -434,81 +443,85 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
|
|||||||
_env_id = f'dmc_cartpole-three_poles_promp-v0'
|
_env_id = f'dmc_cartpole-three_poles_promp-v0'
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_cartpole3poles_promp
|
||||||
"name": f"cartpole-three_poles",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"cartpole-three_poles",
|
||||||
"camera_id": 0,
|
# # "time_limit": 1,
|
||||||
"episode_length": 1000,
|
# "camera_id": 0,
|
||||||
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
# "episode_length": 1000,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [suite.cartpole.ThreePolesMPWrapper],
|
||||||
"num_dof": 1,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 1,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"policy_type": "motor",
|
# "duration": 10,
|
||||||
"weights_scale": 0.2,
|
# "policy_type": "motor",
|
||||||
"zero_start": True,
|
# "weights_scale": 0.2,
|
||||||
"policy_kwargs": {
|
# "zero_start": True,
|
||||||
"p_gains": 10,
|
# "policy_kwargs": {
|
||||||
"d_gains": 10
|
# "p_gains": 10,
|
||||||
}
|
# "d_gains": 10
|
||||||
}
|
# }
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
# DeepMind Manipulation
|
# DeepMind Manipulation
|
||||||
kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_mani_reach_site_features_dmp['name'] = f"manipulation-reach_site_features"
|
kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features"
|
||||||
kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper)
|
kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper)
|
||||||
kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||||
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
|
# TODO: weight scale 50, but goal scale 0.1
|
||||||
|
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||||
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
|
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
|
||||||
register(
|
register(
|
||||||
id=f'dmc_manipulation-reach_site_dmp-v0',
|
id=f'dmc_manipulation-reach_site_dmp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs=kwargs_dict_mani_reach_site_features_dmp
|
||||||
"name": f"manipulation-reach_site_features",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"manipulation-reach_site_features",
|
||||||
"episode_length": 250,
|
# # "time_limit": 1,
|
||||||
"wrappers": [manipulation.reach_site.MPWrapper],
|
# "episode_length": 250,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [manipulation.reach_site.MPWrapper],
|
||||||
"num_dof": 9,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 9,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"learn_goal": True,
|
# "duration": 10,
|
||||||
"alpha_phase": 2,
|
# "learn_goal": True,
|
||||||
"bandwidth_factor": 2,
|
# "alpha_phase": 2,
|
||||||
"policy_type": "velocity",
|
# "bandwidth_factor": 2,
|
||||||
"weights_scale": 50,
|
# "policy_type": "velocity",
|
||||||
"goal_scale": 0.1,
|
# "weights_scale": 50,
|
||||||
}
|
# "goal_scale": 0.1,
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
|
||||||
|
|
||||||
kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||||
kwargs_dict_mani_reach_site_features_promp['name'] = f"manipulation-reach_site_features"
|
kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features"
|
||||||
kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper)
|
kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper)
|
||||||
kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||||
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
|
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
|
||||||
register(
|
register(
|
||||||
id=f'dmc_manipulation-reach_site_promp-v0',
|
id=f'dmc_manipulation-reach_site_promp-v0',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs={
|
kwargs=kwargs_dict_mani_reach_site_features_promp
|
||||||
"name": f"manipulation-reach_site_features",
|
# {
|
||||||
# "time_limit": 1,
|
# "name": f"manipulation-reach_site_features",
|
||||||
"episode_length": 250,
|
# # "time_limit": 1,
|
||||||
"wrappers": [manipulation.reach_site.MPWrapper],
|
# "episode_length": 250,
|
||||||
"traj_gen_kwargs": {
|
# "wrappers": [manipulation.reach_site.MPWrapper],
|
||||||
"num_dof": 9,
|
# "traj_gen_kwargs": {
|
||||||
"num_basis": 5,
|
# "num_dof": 9,
|
||||||
"duration": 10,
|
# "num_basis": 5,
|
||||||
"policy_type": "velocity",
|
# "duration": 10,
|
||||||
"weights_scale": 0.2,
|
# "policy_type": "velocity",
|
||||||
"zero_start": True,
|
# "weights_scale": 0.2,
|
||||||
}
|
# "zero_start": True,
|
||||||
}
|
# }
|
||||||
|
# }
|
||||||
)
|
)
|
||||||
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
|
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
|
||||||
|
@ -2,17 +2,22 @@
|
|||||||
# License: MIT
|
# License: MIT
|
||||||
# Copyright (c) 2020 Denis Yarats
|
# Copyright (c) 2020 Denis Yarats
|
||||||
import collections
|
import collections
|
||||||
from typing import Any, Dict, Tuple
|
from collections.abc import MutableMapping
|
||||||
|
from typing import Any, Dict, Tuple, Optional, Union, Callable
|
||||||
|
|
||||||
|
from dm_control import composer
|
||||||
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from dm_control import manipulation, suite
|
from dm_control.rl import control
|
||||||
from dm_env import specs
|
from dm_env import specs
|
||||||
from gym import core, spaces
|
from gym import spaces
|
||||||
|
from gym.core import ObsType
|
||||||
|
|
||||||
|
|
||||||
def _spec_to_box(spec):
|
def _spec_to_box(spec):
|
||||||
def extract_min_max(s):
|
def extract_min_max(s):
|
||||||
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
|
assert s.dtype == np.float64 or s.dtype == np.float32, \
|
||||||
|
f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
|
||||||
dim = int(np.prod(s.shape))
|
dim = int(np.prod(s.shape))
|
||||||
if type(s) == specs.Array:
|
if type(s) == specs.Array:
|
||||||
bound = np.inf * np.ones(dim, dtype=s.dtype)
|
bound = np.inf * np.ones(dim, dtype=s.dtype)
|
||||||
@ -32,7 +37,7 @@ def _spec_to_box(spec):
|
|||||||
return spaces.Box(low, high, dtype=s.dtype)
|
return spaces.Box(low, high, dtype=s.dtype)
|
||||||
|
|
||||||
|
|
||||||
def _flatten_obs(obs: collections.MutableMapping):
|
def _flatten_obs(obs: MutableMapping):
|
||||||
"""
|
"""
|
||||||
Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
|
Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
|
||||||
Args:
|
Args:
|
||||||
@ -42,7 +47,7 @@ def _flatten_obs(obs: collections.MutableMapping):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not isinstance(obs, collections.MutableMapping):
|
if not isinstance(obs, MutableMapping):
|
||||||
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
|
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
|
||||||
|
|
||||||
# Keep key order consistent for non OrderedDicts
|
# Keep key order consistent for non OrderedDicts
|
||||||
@ -52,47 +57,19 @@ def _flatten_obs(obs: collections.MutableMapping):
|
|||||||
return np.concatenate(obs_vals)
|
return np.concatenate(obs_vals)
|
||||||
|
|
||||||
|
|
||||||
class DMCWrapper(core.Env):
|
class DMCWrapper(gym.Env):
|
||||||
def __init__(
|
def __init__(self,
|
||||||
self,
|
env: Callable[[], Union[composer.Environment, control.Environment]],
|
||||||
domain_name: str,
|
):
|
||||||
task_name: str,
|
|
||||||
task_kwargs: dict = {},
|
|
||||||
visualize_reward: bool = True,
|
|
||||||
from_pixels: bool = False,
|
|
||||||
height: int = 84,
|
|
||||||
width: int = 84,
|
|
||||||
camera_id: int = 0,
|
|
||||||
frame_skip: int = 1,
|
|
||||||
environment_kwargs: dict = None,
|
|
||||||
channels_first: bool = True
|
|
||||||
):
|
|
||||||
assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.'
|
|
||||||
self._from_pixels = from_pixels
|
|
||||||
self._height = height
|
|
||||||
self._width = width
|
|
||||||
self._camera_id = camera_id
|
|
||||||
self._frame_skip = frame_skip
|
|
||||||
self._channels_first = channels_first
|
|
||||||
|
|
||||||
# create task
|
# TODO: Currently this is required to be a function because dmc does not allow to copy composers environments
|
||||||
if domain_name == "manipulation":
|
self._env = env()
|
||||||
assert not from_pixels and not task_name.endswith("_vision"), \
|
|
||||||
"TODO: Vision interface for manipulation is different to suite and needs to be implemented"
|
|
||||||
self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random'])
|
|
||||||
else:
|
|
||||||
self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
|
|
||||||
visualize_reward=visualize_reward, environment_kwargs=environment_kwargs)
|
|
||||||
|
|
||||||
# action and observation space
|
# action and observation space
|
||||||
self._action_space = _spec_to_box([self._env.action_spec()])
|
self._action_space = _spec_to_box([self._env.action_spec()])
|
||||||
self._observation_space = _spec_to_box(self._env.observation_spec().values())
|
self._observation_space = _spec_to_box(self._env.observation_spec().values())
|
||||||
|
|
||||||
self._last_state = None
|
self._window = None
|
||||||
self.viewer = None
|
|
||||||
|
|
||||||
# set seed
|
|
||||||
self.seed(seed=task_kwargs.get('random', 1))
|
|
||||||
|
|
||||||
def __getattr__(self, item):
|
def __getattr__(self, item):
|
||||||
"""Propagate only non-existent properties to wrapped env."""
|
"""Propagate only non-existent properties to wrapped env."""
|
||||||
@ -103,17 +80,7 @@ class DMCWrapper(core.Env):
|
|||||||
return getattr(self._env, item)
|
return getattr(self._env, item)
|
||||||
|
|
||||||
def _get_obs(self, time_step):
|
def _get_obs(self, time_step):
|
||||||
if self._from_pixels:
|
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
|
||||||
obs = self.render(
|
|
||||||
mode="rgb_array",
|
|
||||||
height=self._height,
|
|
||||||
width=self._width,
|
|
||||||
camera_id=self._camera_id
|
|
||||||
)
|
|
||||||
if self._channels_first:
|
|
||||||
obs = obs.transpose(2, 0, 1).copy()
|
|
||||||
else:
|
|
||||||
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
|
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -126,20 +93,7 @@ class DMCWrapper(core.Env):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def dt(self):
|
def dt(self):
|
||||||
return self._env.control_timestep() * self._frame_skip
|
return self._env.control_timestep()
|
||||||
|
|
||||||
@property
|
|
||||||
def base_step_limit(self):
|
|
||||||
"""
|
|
||||||
Returns: max_episode_steps of the underlying DMC env
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Accessing private attribute because DMC does not expose time_limit or step_limit.
|
|
||||||
# Only the current time_step/time as well as the control_timestep can be accessed.
|
|
||||||
try:
|
|
||||||
return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip
|
|
||||||
except AttributeError as e:
|
|
||||||
return self._env._time_limit / self.dt
|
|
||||||
|
|
||||||
def seed(self, seed=None):
|
def seed(self, seed=None):
|
||||||
self._action_space.seed(seed)
|
self._action_space.seed(seed)
|
||||||
@ -147,56 +101,71 @@ class DMCWrapper(core.Env):
|
|||||||
|
|
||||||
def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
|
def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
|
||||||
assert self._action_space.contains(action)
|
assert self._action_space.contains(action)
|
||||||
reward = 0
|
|
||||||
extra = {'internal_state': self._env.physics.get_state().copy()}
|
extra = {'internal_state': self._env.physics.get_state().copy()}
|
||||||
|
|
||||||
for _ in range(self._frame_skip):
|
time_step = self._env.step(action)
|
||||||
time_step = self._env.step(action)
|
reward = time_step.reward or 0.
|
||||||
reward += time_step.reward or 0.
|
done = time_step.last()
|
||||||
done = time_step.last()
|
|
||||||
if done:
|
|
||||||
break
|
|
||||||
|
|
||||||
self._last_state = _flatten_obs(time_step.observation)
|
|
||||||
obs = self._get_obs(time_step)
|
obs = self._get_obs(time_step)
|
||||||
extra['discount'] = time_step.discount
|
extra['discount'] = time_step.discount
|
||||||
|
|
||||||
return obs, reward, done, extra
|
return obs, reward, done, extra
|
||||||
|
|
||||||
def reset(self) -> np.ndarray:
|
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
|
||||||
|
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
|
||||||
time_step = self._env.reset()
|
time_step = self._env.reset()
|
||||||
self._last_state = _flatten_obs(time_step.observation)
|
|
||||||
obs = self._get_obs(time_step)
|
obs = self._get_obs(time_step)
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
|
def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False,
|
||||||
if self._last_state is None:
|
segmentation=False, scene_option=None, render_flag_overrides=None):
|
||||||
raise ValueError('Environment not ready to render. Call reset() first.')
|
|
||||||
|
|
||||||
camera_id = camera_id or self._camera_id
|
|
||||||
|
|
||||||
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
||||||
if mode == "rgb_array":
|
if mode == "rgb_array":
|
||||||
height = height or self._height
|
return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays,
|
||||||
width = width or self._width
|
depth=depth, segmentation=segmentation, scene_option=scene_option,
|
||||||
return self._env.physics.render(height=height, width=width, camera_id=camera_id)
|
render_flag_overrides=render_flag_overrides)
|
||||||
|
|
||||||
elif mode == 'human':
|
# Render max available buffer size. Larger is only possible by altering the XML.
|
||||||
if self.viewer is None:
|
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
|
||||||
# pylint: disable=import-outside-toplevel
|
width=self._env.physics.model.vis.global_.offwidth,
|
||||||
# pylint: disable=g-import-not-at-top
|
camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation,
|
||||||
from gym.envs.classic_control import rendering
|
scene_option=scene_option, render_flag_overrides=render_flag_overrides)
|
||||||
self.viewer = rendering.SimpleImageViewer()
|
|
||||||
# Render max available buffer size. Larger is only possible by altering the XML.
|
if depth:
|
||||||
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
|
img = np.dstack([img.astype(np.uint8)] * 3)
|
||||||
width=self._env.physics.model.vis.global_.offwidth,
|
|
||||||
camera_id=camera_id)
|
if mode == 'human':
|
||||||
self.viewer.imshow(img)
|
try:
|
||||||
return self.viewer.isopen
|
import cv2
|
||||||
|
if self._window is None:
|
||||||
|
self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE)
|
||||||
|
|
||||||
|
cv2.imshow(self.id, img[..., ::-1]) # Image in BGR
|
||||||
|
cv2.waitKey(1)
|
||||||
|
except ImportError:
|
||||||
|
import pygame
|
||||||
|
img = img.transpose((1, 0, 2))
|
||||||
|
if self._window is None:
|
||||||
|
pygame.init()
|
||||||
|
pygame.display.init()
|
||||||
|
self._window = pygame.display.set_mode(img.shape[:2])
|
||||||
|
|
||||||
|
self._window.blit(pygame.surfarray.make_surface(img), (0, 0))
|
||||||
|
pygame.event.pump()
|
||||||
|
pygame.display.flip()
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
super().close()
|
super().close()
|
||||||
if self.viewer is not None and self.viewer.isopen:
|
if self._window is not None:
|
||||||
self.viewer.close()
|
try:
|
||||||
|
import cv2
|
||||||
|
cv2.destroyWindow(self.id)
|
||||||
|
except ImportError:
|
||||||
|
import pygame
|
||||||
|
|
||||||
|
pygame.display.quit()
|
||||||
|
pygame.quit()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def reward_range(self) -> Tuple[float, float]:
|
def reward_range(self) -> Tuple[float, float]:
|
||||||
@ -204,3 +173,8 @@ class DMCWrapper(core.Env):
|
|||||||
if isinstance(reward_spec, specs.BoundedArray):
|
if isinstance(reward_spec, specs.BoundedArray):
|
||||||
return reward_spec.minimum, reward_spec.maximum
|
return reward_spec.minimum, reward_spec.maximum
|
||||||
return -float('inf'), float('inf')
|
return -float('inf'), float('inf')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def metadata(self):
|
||||||
|
return {'render.modes': ['human', 'rgb_array'],
|
||||||
|
'video.frames_per_second': round(1.0 / self._env.control_timestep())}
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
import alr_envs
|
import alr_envs
|
||||||
|
|
||||||
|
|
||||||
@ -59,7 +61,8 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
|
|||||||
"""
|
"""
|
||||||
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
|
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
|
||||||
# E.g. here for way to many basis functions
|
# E.g. here for way to many basis functions
|
||||||
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
|
# env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
|
||||||
|
env = alr_envs.make(env_name, seed)
|
||||||
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
||||||
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
||||||
|
|
||||||
@ -72,15 +75,16 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
|
|||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
ac = env.action_space.sample() * 1000
|
ac = env.action_space.sample()
|
||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(ac)
|
||||||
rewards += reward
|
rewards += reward
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
print(rewards)
|
print(i, rewards)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
print(obs)
|
|
||||||
|
return obs
|
||||||
|
|
||||||
|
|
||||||
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||||
@ -139,7 +143,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
render = True
|
render = False
|
||||||
# # DMP
|
# # DMP
|
||||||
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
||||||
#
|
#
|
||||||
@ -150,7 +154,7 @@ if __name__ == '__main__':
|
|||||||
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
# Altered basis functions
|
# Altered basis functions
|
||||||
example_custom_mp("HopperJumpSparseProMP-v0", seed=10, iterations=10, render=render)
|
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
|
||||||
|
|
||||||
# Custom MP
|
# Custom MP
|
||||||
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||||
|
@ -36,7 +36,7 @@ for _task in _goal_change_envs:
|
|||||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||||
kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper)
|
kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper)
|
||||||
kwargs_dict_goal_change_promp['name'] = _task
|
kwargs_dict_goal_change_promp['name'] = f'metaworld:{_task}'
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
@ -52,7 +52,7 @@ for _task in _object_change_envs:
|
|||||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||||
kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper)
|
kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper)
|
||||||
kwargs_dict_object_change_promp['name'] = _task
|
kwargs_dict_object_change_promp['name'] = f'metaworld:{_task}'
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
@ -77,7 +77,7 @@ for _task in _goal_and_object_change_envs:
|
|||||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||||
kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper)
|
kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper)
|
||||||
kwargs_dict_goal_and_object_change_promp['name'] = _task
|
kwargs_dict_goal_and_object_change_promp['name'] = f'metaworld:{_task}'
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
@ -93,7 +93,7 @@ for _task in _goal_and_endeffector_change_envs:
|
|||||||
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
_env_id = f'{name}ProMP-{task_id_split[-1]}'
|
||||||
kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper)
|
kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper)
|
||||||
kwargs_dict_goal_and_endeffector_change_promp['name'] = _task
|
kwargs_dict_goal_and_endeffector_change_promp['name'] = f'metaworld:{_task}'
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
|
@ -27,7 +27,6 @@ DEFAULT_BB_DICT_ProMP = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||||
kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6
|
kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6
|
||||||
kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075
|
kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075
|
||||||
@ -35,7 +34,7 @@ kwargs_dict_reacher_promp['basis_generator_kwargs']['num_basis'] = 6
|
|||||||
kwargs_dict_reacher_promp['name'] = "Reacher-v2"
|
kwargs_dict_reacher_promp['name'] = "Reacher-v2"
|
||||||
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper)
|
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper)
|
||||||
register(
|
register(
|
||||||
id='Reacher2dProMP-v2',
|
id='ReacherProMP-v2',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||||
kwargs=kwargs_dict_reacher_promp
|
kwargs=kwargs_dict_reacher_promp
|
||||||
)
|
)
|
||||||
|
@ -1,65 +1 @@
|
|||||||
import re
|
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
import gym
|
|
||||||
from gym.envs.registration import register
|
|
||||||
|
|
||||||
from alr_envs.utils.make_env_helpers import make
|
|
||||||
|
|
||||||
|
|
||||||
def make_dmc(
|
|
||||||
id: str,
|
|
||||||
seed: int = 1,
|
|
||||||
visualize_reward: bool = True,
|
|
||||||
from_pixels: bool = False,
|
|
||||||
height: int = 84,
|
|
||||||
width: int = 84,
|
|
||||||
camera_id: int = 0,
|
|
||||||
frame_skip: int = 1,
|
|
||||||
episode_length: Union[None, int] = None,
|
|
||||||
environment_kwargs: dict = {},
|
|
||||||
time_limit: Union[None, float] = None,
|
|
||||||
channels_first: bool = True
|
|
||||||
):
|
|
||||||
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
|
|
||||||
# License: MIT
|
|
||||||
# Copyright (c) 2020 Denis Yarats
|
|
||||||
|
|
||||||
if not re.match(r"\w+-\w+", id):
|
|
||||||
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
|
|
||||||
domain_name, task_name = id.split("-")
|
|
||||||
|
|
||||||
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
|
|
||||||
|
|
||||||
if from_pixels:
|
|
||||||
assert not visualize_reward, 'Cannot use visualize reward when learning from pixels.'
|
|
||||||
|
|
||||||
# Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
|
|
||||||
episode_length = episode_length or (250 if domain_name == "manipulation" else 1000)
|
|
||||||
|
|
||||||
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
|
|
||||||
if env_id not in gym.envs.registry.env_specs:
|
|
||||||
task_kwargs = {'random': seed}
|
|
||||||
# if seed is not None:
|
|
||||||
# task_kwargs['random'] = seed
|
|
||||||
if time_limit is not None:
|
|
||||||
task_kwargs['time_limit'] = time_limit
|
|
||||||
register(
|
|
||||||
id=env_id,
|
|
||||||
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
|
|
||||||
kwargs=dict(
|
|
||||||
domain_name=domain_name,
|
|
||||||
task_name=task_name,
|
|
||||||
task_kwargs=task_kwargs,
|
|
||||||
environment_kwargs=environment_kwargs,
|
|
||||||
visualize_reward=visualize_reward,
|
|
||||||
from_pixels=from_pixels,
|
|
||||||
height=height,
|
|
||||||
width=width,
|
|
||||||
camera_id=camera_id,
|
|
||||||
frame_skip=frame_skip,
|
|
||||||
channels_first=channels_first,
|
|
||||||
),
|
|
||||||
max_episode_steps=max_episode_steps,
|
|
||||||
)
|
|
||||||
return gym.make(env_id)
|
|
||||||
|
@ -1,20 +1,41 @@
|
|||||||
import warnings
|
import re
|
||||||
|
import uuid
|
||||||
|
from collections.abc import MutableMapping
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from typing import Iterable, Type, Union, MutableMapping
|
from math import ceil
|
||||||
|
from typing import Iterable, Type, Union
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym.envs.registration import EnvSpec, registry
|
|
||||||
|
import alr_envs
|
||||||
|
|
||||||
|
try:
|
||||||
|
from dm_control import suite, manipulation, composer
|
||||||
|
from dm_control.rl import control
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
import metaworld
|
||||||
|
except Exception:
|
||||||
|
# catch Exception due to Mujoco-py
|
||||||
|
pass
|
||||||
|
|
||||||
|
from gym.envs.registration import registry
|
||||||
|
from gym.envs.registration import register
|
||||||
from gym.wrappers import TimeAwareObservation
|
from gym.wrappers import TimeAwareObservation
|
||||||
|
|
||||||
from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
|
from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
|
||||||
from alr_envs.black_box.factory.controller_factory import get_controller
|
|
||||||
from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
|
from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
|
||||||
|
from alr_envs.black_box.factory.controller_factory import get_controller
|
||||||
from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
|
from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
|
||||||
from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
|
from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
|
||||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||||
from alr_envs.utils.utils import nested_update
|
from alr_envs.utils.utils import nested_update
|
||||||
|
|
||||||
|
ALL_FRAMEWORK_TYPES = ['meta', 'dmc', 'gym']
|
||||||
|
|
||||||
|
|
||||||
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||||
"""
|
"""
|
||||||
@ -70,57 +91,25 @@ def _make(env_id: str, seed, **kwargs):
|
|||||||
# env_id.split(':')
|
# env_id.split(':')
|
||||||
# if 'dmc' :
|
# if 'dmc' :
|
||||||
|
|
||||||
try:
|
if ':' in env_id:
|
||||||
# This access is required to allow for nested dict updates for BB envs
|
split_id = env_id.split(':')
|
||||||
spec = registry.get(env_id)
|
framework, env_id = split_id[-2:]
|
||||||
all_kwargs = deepcopy(spec.kwargs)
|
else:
|
||||||
nested_update(all_kwargs, kwargs)
|
framework = None
|
||||||
kwargs = all_kwargs
|
|
||||||
|
|
||||||
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
|
|
||||||
if env_id.startswith("dmc"):
|
|
||||||
kwargs.update({"seed": seed})
|
|
||||||
|
|
||||||
# Gym
|
|
||||||
env = gym.make(env_id, **kwargs)
|
|
||||||
env.seed(seed)
|
|
||||||
env.action_space.seed(seed)
|
|
||||||
env.observation_space.seed(seed)
|
|
||||||
except (gym.error.Error, AttributeError):
|
|
||||||
|
|
||||||
|
if framework == 'metaworld':
|
||||||
# MetaWorld env
|
# MetaWorld env
|
||||||
import metaworld
|
env = make_metaworld(env_id, seed=seed, **kwargs)
|
||||||
if env_id in metaworld.ML1.ENV_NAMES:
|
elif framework == 'dmc':
|
||||||
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
# DeepMind Controlp
|
||||||
|
env = make_dmc(env_id, seed=seed, **kwargs)
|
||||||
# setting this avoids generating the same initialization after each reset
|
else:
|
||||||
env._freeze_rand_vec = False
|
env = make_gym(env_id, seed=seed, **kwargs)
|
||||||
env.seeded_rand_vec = True
|
|
||||||
|
|
||||||
# Manually set spec, as metaworld environments are not registered via gym
|
|
||||||
env.unwrapped.spec = EnvSpec(env_id)
|
|
||||||
# Set Timelimit based on the maximum allowed path length of the environment
|
|
||||||
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
|
|
||||||
# env.seed(seed)
|
|
||||||
# env.action_space.seed(seed)
|
|
||||||
# env.observation_space.seed(seed)
|
|
||||||
# env.goal_space.seed(seed)
|
|
||||||
|
|
||||||
else:
|
|
||||||
# DMC
|
|
||||||
from alr_envs import make_dmc
|
|
||||||
env = make_dmc(env_id, seed=seed, **kwargs)
|
|
||||||
|
|
||||||
if not env.base_step_limit == env.spec.max_episode_steps:
|
|
||||||
raise ValueError(f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym "
|
|
||||||
f"is different from the DMC environment specification of {env.base_step_limit} steps.")
|
|
||||||
|
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
def _make_wrapped_env(
|
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
|
||||||
env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Helper function for creating a wrapped gym environment using MPs.
|
Helper function for creating a wrapped gym environment using MPs.
|
||||||
It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
|
It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
|
||||||
@ -149,7 +138,7 @@ def _make_wrapped_env(
|
|||||||
|
|
||||||
def make_bb(
|
def make_bb(
|
||||||
env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
|
env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
|
||||||
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
|
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
This can also be used standalone for manually building a custom DMP environment.
|
This can also be used standalone for manually building a custom DMP environment.
|
||||||
@ -167,7 +156,6 @@ def make_bb(
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
||||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
|
||||||
|
|
||||||
learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
|
learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
|
||||||
do_replanning = black_box_kwargs.get('replanning_schedule')
|
do_replanning = black_box_kwargs.get('replanning_schedule')
|
||||||
@ -176,12 +164,16 @@ def make_bb(
|
|||||||
|
|
||||||
if learn_sub_trajs or do_replanning:
|
if learn_sub_trajs or do_replanning:
|
||||||
# add time_step observation when replanning
|
# add time_step observation when replanning
|
||||||
kwargs['wrappers'].append(TimeAwareObservation)
|
if not any(issubclass(w, TimeAwareObservation) for w in kwargs['wrappers']):
|
||||||
|
# Add as first wrapper in order to alter observation
|
||||||
|
kwargs['wrappers'].insert(0, TimeAwareObservation)
|
||||||
|
|
||||||
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
|
env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
||||||
|
|
||||||
|
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item())
|
||||||
|
|
||||||
if black_box_kwargs.get('duration') is None:
|
if black_box_kwargs.get('duration') is None:
|
||||||
black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
|
black_box_kwargs['duration'] = env.spec.max_episode_steps * env.dt
|
||||||
if phase_kwargs.get('tau') is None:
|
if phase_kwargs.get('tau') is None:
|
||||||
phase_kwargs['tau'] = black_box_kwargs['duration']
|
phase_kwargs['tau'] = black_box_kwargs['duration']
|
||||||
|
|
||||||
@ -194,7 +186,7 @@ def make_bb(
|
|||||||
controller = get_controller(**controller_kwargs)
|
controller = get_controller(**controller_kwargs)
|
||||||
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
|
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
|
||||||
|
|
||||||
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
|
bb_env = BlackBoxWrapper(env, trajectory_generator=traj_gen, tracking_controller=controller,
|
||||||
**black_box_kwargs)
|
**black_box_kwargs)
|
||||||
|
|
||||||
return bb_env
|
return bb_env
|
||||||
@ -249,6 +241,109 @@ def make_bb_env_helper(**kwargs):
|
|||||||
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
||||||
|
|
||||||
|
|
||||||
|
def make_dmc(
|
||||||
|
env_id: Union[str, composer.Environment, control.Environment],
|
||||||
|
seed: int = None,
|
||||||
|
visualize_reward: bool = True,
|
||||||
|
time_limit: Union[None, float] = None,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
if not re.match(r"\w+-\w+", env_id):
|
||||||
|
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
|
||||||
|
domain_name, task_name = env_id.split("-")
|
||||||
|
|
||||||
|
if task_name.endswith("_vision"):
|
||||||
|
# TODO
|
||||||
|
raise ValueError("The vision interface for manipulation tasks is currently not supported.")
|
||||||
|
|
||||||
|
if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL:
|
||||||
|
raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.')
|
||||||
|
|
||||||
|
# env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
|
||||||
|
gym_id = uuid.uuid4().hex + '-v1'
|
||||||
|
|
||||||
|
task_kwargs = {'random': seed}
|
||||||
|
if time_limit is not None:
|
||||||
|
task_kwargs['time_limit'] = time_limit
|
||||||
|
|
||||||
|
# create task
|
||||||
|
# Accessing private attribute because DMC does not expose time_limit or step_limit.
|
||||||
|
# Only the current time_step/time as well as the control_timestep can be accessed.
|
||||||
|
if domain_name == "manipulation":
|
||||||
|
env = manipulation.load(environment_name=task_name, seed=seed)
|
||||||
|
max_episode_steps = ceil(env._time_limit / env.control_timestep())
|
||||||
|
else:
|
||||||
|
env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
|
||||||
|
visualize_reward=visualize_reward, environment_kwargs=kwargs)
|
||||||
|
max_episode_steps = int(env._step_limit)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id=gym_id,
|
||||||
|
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
|
||||||
|
kwargs={'env': lambda: env},
|
||||||
|
max_episode_steps=max_episode_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
env = gym.make(gym_id)
|
||||||
|
env.seed(seed=seed)
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def make_metaworld(env_id, seed, **kwargs):
|
||||||
|
if env_id not in metaworld.ML1.ENV_NAMES:
|
||||||
|
raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.')
|
||||||
|
|
||||||
|
_env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
||||||
|
|
||||||
|
# setting this avoids generating the same initialization after each reset
|
||||||
|
_env._freeze_rand_vec = False
|
||||||
|
# New argument to use global seeding
|
||||||
|
_env.seeded_rand_vec = True
|
||||||
|
|
||||||
|
# Manually set spec, as metaworld environments are not registered via gym
|
||||||
|
# _env.unwrapped.spec = EnvSpec(env_id)
|
||||||
|
# Set Timelimit based on the maximum allowed path length of the environment
|
||||||
|
# _env = gym.wrappers.TimeLimit(_env, max_episode_steps=_env.max_path_length)
|
||||||
|
# _env.seed(seed)
|
||||||
|
# _env.action_space.seed(seed)
|
||||||
|
# _env.observation_space.seed(seed)
|
||||||
|
# _env.goal_space.seed(seed)
|
||||||
|
|
||||||
|
gym_id = uuid.uuid4().hex + '-v1'
|
||||||
|
|
||||||
|
register(
|
||||||
|
id=gym_id,
|
||||||
|
entry_point=lambda: _env,
|
||||||
|
max_episode_steps=_env.max_path_length,
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld
|
||||||
|
env = gym.make(gym_id, disable_env_checker=True)
|
||||||
|
env.seed(seed=seed)
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def make_gym(env_id, seed, **kwargs):
|
||||||
|
# This access is required to allow for nested dict updates for BB envs
|
||||||
|
spec = registry.get(env_id)
|
||||||
|
all_kwargs = deepcopy(spec.kwargs)
|
||||||
|
nested_update(all_kwargs, kwargs)
|
||||||
|
kwargs = all_kwargs
|
||||||
|
|
||||||
|
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
|
||||||
|
# if env_id.startswith("dmc") or any(s in env_id.lower() for s in ['promp', 'dmp', 'prodmp']):
|
||||||
|
all_bb_envs = sum(alr_envs.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), [])
|
||||||
|
if env_id.startswith("dmc") or env_id in all_bb_envs:
|
||||||
|
kwargs.update({"seed": seed})
|
||||||
|
|
||||||
|
# Gym
|
||||||
|
env = gym.make(env_id, **kwargs)
|
||||||
|
env.seed(seed)
|
||||||
|
env.action_space.seed(seed)
|
||||||
|
env.observation_space.seed(seed)
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
|
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
|
||||||
"""
|
"""
|
||||||
When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.
|
When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.
|
||||||
|
@ -40,9 +40,9 @@ class TestMPEnvironments(unittest.TestCase):
|
|||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
observations.append(obs)
|
observations.append(obs)
|
||||||
|
|
||||||
ac = env.action_space.sample()
|
actions = env.action_space.sample()
|
||||||
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(actions)
|
||||||
|
|
||||||
self._verify_observations(obs, env.observation_space, "step()")
|
self._verify_observations(obs, env.observation_space, "step()")
|
||||||
self._verify_reward(reward)
|
self._verify_reward(reward)
|
||||||
@ -55,13 +55,13 @@ class TestMPEnvironments(unittest.TestCase):
|
|||||||
env.render("human")
|
env.render("human")
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
obs = env.reset()
|
break
|
||||||
|
|
||||||
assert done, "Done flag is not True after max episode length."
|
assert done, "Done flag is not True after end of episode."
|
||||||
observations.append(obs)
|
observations.append(obs)
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
return np.array(observations), np.array(rewards), np.array(dones)
|
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
|
||||||
|
|
||||||
def _run_env_determinism(self, ids):
|
def _run_env_determinism(self, ids):
|
||||||
seed = 0
|
seed = 0
|
||||||
@ -70,8 +70,9 @@ class TestMPEnvironments(unittest.TestCase):
|
|||||||
traj1 = self._run_env(env_id, seed=seed)
|
traj1 = self._run_env(env_id, seed=seed)
|
||||||
traj2 = self._run_env(env_id, seed=seed)
|
traj2 = self._run_env(env_id, seed=seed)
|
||||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||||
self.assertTrue(np.allclose(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||||
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
|
||||||
@ -81,7 +82,7 @@ class TestMPEnvironments(unittest.TestCase):
|
|||||||
f"not contained in observation space {observation_space}.")
|
f"not contained in observation space {observation_space}.")
|
||||||
|
|
||||||
def _verify_reward(self, reward):
|
def _verify_reward(self, reward):
|
||||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
|
||||||
|
|
||||||
def _verify_done(self, done):
|
def _verify_done(self, done):
|
||||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||||
@ -113,12 +114,12 @@ class TestMPEnvironments(unittest.TestCase):
|
|||||||
def test_dmc_environment_functionality(self):
|
def test_dmc_environment_functionality(self):
|
||||||
"""Tests that environments runs without errors using random actions for DMC MP envs."""
|
"""Tests that environments runs without errors using random actions for DMC MP envs."""
|
||||||
with self.subTest(msg="DMP"):
|
with self.subTest(msg="DMP"):
|
||||||
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']:
|
for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']:
|
||||||
with self.subTest(msg=env_id):
|
with self.subTest(msg=env_id):
|
||||||
self._run_env(env_id)
|
self._run_env(env_id)
|
||||||
|
|
||||||
with self.subTest(msg="ProMP"):
|
with self.subTest(msg="ProMP"):
|
||||||
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']:
|
for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']:
|
||||||
with self.subTest(msg=env_id):
|
with self.subTest(msg=env_id):
|
||||||
self._run_env(env_id)
|
self._run_env(env_id)
|
||||||
|
|
||||||
@ -151,9 +152,9 @@ class TestMPEnvironments(unittest.TestCase):
|
|||||||
def test_dmc_environment_determinism(self):
|
def test_dmc_environment_determinism(self):
|
||||||
"""Tests that identical seeds produce identical trajectories for DMC MP Envs."""
|
"""Tests that identical seeds produce identical trajectories for DMC MP Envs."""
|
||||||
with self.subTest(msg="DMP"):
|
with self.subTest(msg="DMP"):
|
||||||
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"])
|
self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"])
|
||||||
with self.subTest(msg="ProMP"):
|
with self.subTest(msg="ProMP"):
|
||||||
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"])
|
self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"])
|
||||||
|
|
||||||
def test_metaworld_environment_determinism(self):
|
def test_metaworld_environment_determinism(self):
|
||||||
"""Tests that identical seeds produce identical trajectories for Metaworld MP Envs."""
|
"""Tests that identical seeds produce identical trajectories for Metaworld MP Envs."""
|
||||||
|
@ -7,8 +7,8 @@ from dm_control import suite, manipulation
|
|||||||
|
|
||||||
from alr_envs import make
|
from alr_envs import make
|
||||||
|
|
||||||
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
|
DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
|
||||||
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
|
MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
|
||||||
SEED = 1
|
SEED = 1
|
||||||
|
|
||||||
|
|
||||||
@ -29,9 +29,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
print(env_id)
|
||||||
env: gym.Env = make(env_id, seed=seed)
|
env: gym.Env = make(env_id, seed=seed)
|
||||||
rewards = []
|
rewards = []
|
||||||
observations = []
|
observations = []
|
||||||
|
actions = []
|
||||||
dones = []
|
dones = []
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
self._verify_observations(obs, env.observation_space, "reset()")
|
self._verify_observations(obs, env.observation_space, "reset()")
|
||||||
@ -43,6 +45,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
|||||||
observations.append(obs)
|
observations.append(obs)
|
||||||
|
|
||||||
ac = env.action_space.sample()
|
ac = env.action_space.sample()
|
||||||
|
actions.append(ac)
|
||||||
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||||
obs, reward, done, info = env.step(ac)
|
obs, reward, done, info = env.step(ac)
|
||||||
|
|
||||||
@ -57,13 +60,13 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
|||||||
env.render("human")
|
env.render("human")
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
obs = env.reset()
|
break
|
||||||
|
|
||||||
assert done, "Done flag is not True after max episode length."
|
assert done, "Done flag is not True after end of episode."
|
||||||
observations.append(obs)
|
observations.append(obs)
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
return np.array(observations), np.array(rewards), np.array(dones)
|
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
|
||||||
|
|
||||||
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
||||||
self.assertTrue(observation_space.contains(obs),
|
self.assertTrue(observation_space.contains(obs),
|
||||||
@ -71,7 +74,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
|||||||
f"not contained in observation space {observation_space}.")
|
f"not contained in observation space {observation_space}.")
|
||||||
|
|
||||||
def _verify_reward(self, reward):
|
def _verify_reward(self, reward):
|
||||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
|
||||||
|
|
||||||
def _verify_done(self, done):
|
def _verify_done(self, done):
|
||||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||||
@ -91,8 +94,9 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
|||||||
traj1 = self._run_env(env_id, seed=seed)
|
traj1 = self._run_env(env_id, seed=seed)
|
||||||
traj2 = self._run_env(env_id, seed=seed)
|
traj2 = self._run_env(env_id, seed=seed)
|
||||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||||
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
|
||||||
@ -111,11 +115,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
|
|||||||
traj1 = self._run_env(env_id, seed=seed)
|
traj1 = self._run_env(env_id, seed=seed)
|
||||||
traj2 = self._run_env(env_id, seed=seed)
|
traj2 = self._run_env(env_id, seed=seed)
|
||||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||||
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -6,7 +6,7 @@ import numpy as np
|
|||||||
from alr_envs import make
|
from alr_envs import make
|
||||||
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
|
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
|
||||||
|
|
||||||
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
|
ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
|
||||||
SEED = 1
|
SEED = 1
|
||||||
|
|
||||||
|
|
||||||
@ -57,9 +57,9 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
|
|||||||
env.render("human")
|
env.render("human")
|
||||||
|
|
||||||
if done:
|
if done:
|
||||||
obs = env.reset()
|
break
|
||||||
|
|
||||||
assert done, "Done flag is not True after max episode length."
|
assert done, "Done flag is not True after end of episode."
|
||||||
observations.append(obs)
|
observations.append(obs)
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
@ -71,7 +71,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
|
|||||||
f"not contained in observation space {observation_space}.")
|
f"not contained in observation space {observation_space}.")
|
||||||
|
|
||||||
def _verify_reward(self, reward):
|
def _verify_reward(self, reward):
|
||||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
|
||||||
|
|
||||||
def _verify_done(self, done):
|
def _verify_done(self, done):
|
||||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||||
@ -94,7 +94,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
|
|||||||
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||||
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||||
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user