naming convention and running tests

This commit is contained in:
Fabian 2022-07-11 16:18:18 +02:00
parent 786da2290d
commit ade83b5ae6
13 changed files with 621 additions and 590 deletions

View File

@ -113,7 +113,7 @@ print("OpenAI Gym MP tasks:")
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
print("Deepmind Control MP tasks:")
print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS)
print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("MetaWorld MP tasks:")
print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS)

View File

@ -1,15 +1,14 @@
from alr_envs import dmc, meta, open_ai
from alr_envs.utils import make_dmc
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
# Convenience function for all MP environments
from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS
from .dmc import ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
ALL_MOTION_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key]
for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()}

View File

@ -1,4 +1,7 @@
from typing import Tuple, Union, Optional
import os
os.environ["MUJOCO_GL"] = "egl"
from typing import Tuple, Optional
import gym
import numpy as np
@ -67,7 +70,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def observation(self, observation):
# return context space if we are
obs = observation[self.env.context_mask] if self.return_context_observation else observation
mask = self.env.context_mask
if self.is_time_aware:
mask = np.append(mask, False)
obs = observation[mask] if self.return_context_observation else observation
# cast dtype because metaworld returns incorrect that throws gym error
return obs.astype(self.observation_space.dtype)

View File

@ -2,7 +2,7 @@ from copy import deepcopy
from . import manipulation, suite
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
from gym.envs.registration import register
@ -47,10 +47,9 @@ DEFAULT_BB_DICT_DMP = {
}
}
# DeepMind Control Suite (DMC)
kwargs_dict_bic_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_bic_dmp['name'] = f"ball_in_cup-catch"
kwargs_dict_bic_dmp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_dmp['wrappers'].append(suite.ball_in_cup.MPWrapper)
kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
@ -58,304 +57,313 @@ kwargs_dict_bic_dmp['controller_kwargs']['p_gains'] = 50
kwargs_dict_bic_dmp['controller_kwargs']['d_gains'] = 1
register(
id=f'dmc_ball_in_cup-catch_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"ball_in_cup-catch",
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.ball_in_cup.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 50,
"d_gains": 1
}
}
}
kwargs=kwargs_dict_bic_dmp
# {
# "name": f"ball_in_cup-catch",
# "time_limit": 20,
# "episode_length": 1000,
# "wrappers": [suite.ball_in_cup.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 2,
# "num_basis": 5,
# "duration": 20,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "motor",
# "goal_scale": 0.1,
# "policy_kwargs": {
# "p_gains": 50,
# "d_gains": 1
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
kwargs_dict_bic_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_bic_promp['name'] = f"ball_in_cup-catch"
kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
kwargs_dict_bic_promp['controller_kwargs']['p_gains'] = 50
kwargs_dict_bic_promp['controller_kwargs']['d_gains'] = 1
register(
id=f'dmc_ball_in_cup-catch_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"ball_in_cup-catch",
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.ball_in_cup.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
"policy_type": "motor",
"zero_start": True,
"policy_kwargs": {
"p_gains": 50,
"d_gains": 1
}
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_promp
# {
# "name": f"ball_in_cup-catch",
# "time_limit": 20,
# "episode_length": 1000,
# "wrappers": [suite.ball_in_cup.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 2,
# "num_basis": 5,
# "duration": 20,
# "policy_type": "motor",
# "zero_start": True,
# "policy_kwargs": {
# "p_gains": 50,
# "d_gains": 1
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
kwargs_dict_reacher_easy_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_easy_dmp['name'] = f"reacher-easy"
kwargs_dict_reacher_easy_dmp['name'] = f"dmc:reacher-easy"
kwargs_dict_reacher_easy_dmp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_reacher_easy_dmp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_easy_dmp['controller_kwargs']['d_gains'] = 1
register(
id=f'dmc_reacher-easy_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"reacher-easy",
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"weights_scale": 50,
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 50,
"d_gains": 1
}
}
}
kwargs=kwargs_dict_bic_dmp
# {
# "name": f"reacher-easy",
# "time_limit": 20,
# "episode_length": 1000,
# "wrappers": [suite.reacher.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 2,
# "num_basis": 5,
# "duration": 20,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "motor",
# "weights_scale": 50,
# "goal_scale": 0.1,
# "policy_kwargs": {
# "p_gains": 50,
# "d_gains": 1
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
kwargs_dict_reacher_easy_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_easy_promp['name'] = f"reacher-easy"
kwargs_dict_reacher_easy_promp['name'] = f"dmc:reacher-easy"
kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_promp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_easy_promp['controller_kwargs']['d_gains'] = 1
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=f'dmc_reacher-easy_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"reacher-easy",
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"policy_kwargs": {
"p_gains": 50,
"d_gains": 1
}
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_easy_promp
# {
# "name": f"reacher-easy",
# "time_limit": 20,
# "episode_length": 1000,
# "wrappers": [suite.reacher.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 2,
# "num_basis": 5,
# "duration": 20,
# "policy_type": "motor",
# "weights_scale": 0.2,
# "zero_start": True,
# "policy_kwargs": {
# "p_gains": 50,
# "d_gains": 1
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
kwargs_dict_reacher_hard_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_hard_dmp['name'] = f"reacher-hard"
kwargs_dict_reacher_hard_dmp['name'] = f"dmc:reacher-hard"
kwargs_dict_reacher_hard_dmp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_reacher_hard_dmp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_hard_dmp['controller_kwargs']['d_gains'] = 1
register(
id=f'dmc_reacher-hard_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"reacher-hard",
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"weights_scale": 50,
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 50,
"d_gains": 1
}
}
}
kwargs=kwargs_dict_reacher_hard_dmp
# {
# "name": f"reacher-hard",
# "time_limit": 20,
# "episode_length": 1000,
# "wrappers": [suite.reacher.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 2,
# "num_basis": 5,
# "duration": 20,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "motor",
# "weights_scale": 50,
# "goal_scale": 0.1,
# "policy_kwargs": {
# "p_gains": 50,
# "d_gains": 1
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
kwargs_dict_reacher_hard_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_hard_promp['name'] = f"reacher-hard"
kwargs_dict_reacher_hard_promp['name'] = f"dmc:reacher-hard"
kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_promp['controller_kwargs']['p_gains'] = 50
kwargs_dict_reacher_hard_promp['controller_kwargs']['d_gains'] = 1
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=f'dmc_reacher-hard_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"reacher-hard",
"time_limit": 20,
"episode_length": 1000,
"wrappers": [suite.reacher.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 20,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"policy_kwargs": {
"p_gains": 50,
"d_gains": 1
}
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_promp
# {
# "name": f"reacher-hard",
# "time_limit": 20,
# "episode_length": 1000,
# "wrappers": [suite.reacher.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 2,
# "num_basis": 5,
# "duration": 20,
# "policy_type": "motor",
# "weights_scale": 0.2,
# "zero_start": True,
# "policy_kwargs": {
# "p_gains": 50,
# "d_gains": 1
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"]
for _task in _dmc_cartpole_tasks:
_env_id = f'dmc_cartpole-{_task}_dmp-v0'
kwargs_dict_cartpole_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole_dmp['name'] = f"cartpole-{_task}"
kwargs_dict_cartpole_dmp['camera_id'] = 0
kwargs_dict_cartpole_dmp['name'] = f"dmc:cartpole-{_task}"
kwargs_dict_cartpole_dmp['wrappers'].append(suite.cartpole.MPWrapper)
kwargs_dict_cartpole_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole_dmp['trajectory_generator_kwargs'][
'weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"cartpole-{_task}",
# "time_limit": 1,
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"weights_scale": 50,
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10
}
}
}
kwargs=kwargs_dict_cartpole_dmp
# {
# "name": f"cartpole-{_task}",
# # "time_limit": 1,
# "camera_id": 0,
# "episode_length": 1000,
# "wrappers": [suite.cartpole.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 1,
# "num_basis": 5,
# "duration": 10,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "motor",
# "weights_scale": 50,
# "goal_scale": 0.1,
# "policy_kwargs": {
# "p_gains": 10,
# "d_gains": 10
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'dmc_cartpole-{_task}_promp-v0'
kwargs_dict_cartpole_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole_promp['name'] = f"cartpole-{_task}"
kwargs_dict_cartpole_promp['camera_id'] = 0
kwargs_dict_cartpole_promp['name'] = f"dmc:cartpole-{_task}"
kwargs_dict_cartpole_promp['wrappers'].append(suite.cartpole.MPWrapper)
kwargs_dict_cartpole_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole_promp['controller_kwargs']['d_gains'] = 10
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"cartpole-{_task}",
# "time_limit": 1,
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10
}
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_promp
# {
# "name": f"cartpole-{_task}",
# # "time_limit": 1,
# "camera_id": 0,
# "episode_length": 1000,
# "wrappers": [suite.cartpole.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 1,
# "num_basis": 5,
# "duration": 10,
# "policy_type": "motor",
# "weights_scale": 0.2,
# "zero_start": True,
# "policy_kwargs": {
# "p_gains": 10,
# "d_gains": 10
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
kwargs_dict_cartpole2poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole2poles_dmp['name'] = f"cartpole-two_poles"
kwargs_dict_cartpole2poles_dmp['camera_id'] = 0
kwargs_dict_cartpole2poles_dmp['name'] = f"dmc:cartpole-two_poles"
kwargs_dict_cartpole2poles_dmp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
kwargs_dict_cartpole2poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole2poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"cartpole-two_poles",
# "time_limit": 1,
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
"traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"weights_scale": 50,
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10
}
}
}
kwargs=kwargs_dict_cartpole2poles_dmp
# {
# "name": f"cartpole-two_poles",
# # "time_limit": 1,
# "camera_id": 0,
# "episode_length": 1000,
# "wrappers": [suite.cartpole.TwoPolesMPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 1,
# "num_basis": 5,
# "duration": 10,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "motor",
# "weights_scale": 50,
# "goal_scale": 0.1,
# "policy_kwargs": {
# "p_gains": 10,
# "d_gains": 10
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
kwargs_dict_cartpole2poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole2poles_promp['name'] = f"cartpole-two_poles"
kwargs_dict_cartpole2poles_promp['camera_id'] = 0
kwargs_dict_cartpole2poles_promp['name'] = f"dmc:cartpole-two_poles"
kwargs_dict_cartpole2poles_promp['wrappers'].append(suite.cartpole.TwoPolesMPWrapper)
kwargs_dict_cartpole2poles_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole2poles_promp['controller_kwargs']['d_gains'] = 10
@ -363,70 +371,71 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-two_poles_promp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"cartpole-two_poles",
# "time_limit": 1,
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.TwoPolesMPWrapper],
"traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10
}
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_promp
# {
# "name": f"cartpole-two_poles",
# # "time_limit": 1,
# "camera_id": 0,
# "episode_length": 1000,
# "wrappers": [suite.cartpole.TwoPolesMPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 1,
# "num_basis": 5,
# "duration": 10,
# "policy_type": "motor",
# "weights_scale": 0.2,
# "zero_start": True,
# "policy_kwargs": {
# "p_gains": 10,
# "d_gains": 10
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
kwargs_dict_cartpole3poles_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole3poles_dmp['name'] = f"cartpole-three_poles"
kwargs_dict_cartpole3poles_dmp['camera_id'] = 0
kwargs_dict_cartpole3poles_dmp['name'] = f"dmc:cartpole-three_poles"
kwargs_dict_cartpole3poles_dmp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
kwargs_dict_cartpole3poles_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_cartpole3poles_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"cartpole-three_poles",
# "time_limit": 1,
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
"traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"weights_scale": 50,
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10
}
}
}
kwargs=kwargs_dict_cartpole3poles_dmp
# {
# "name": f"cartpole-three_poles",
# # "time_limit": 1,
# "camera_id": 0,
# "episode_length": 1000,
# "wrappers": [suite.cartpole.ThreePolesMPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 1,
# "num_basis": 5,
# "duration": 10,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "motor",
# "weights_scale": 50,
# "goal_scale": 0.1,
# "policy_kwargs": {
# "p_gains": 10,
# "d_gains": 10
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
kwargs_dict_cartpole3poles_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_cartpole3poles_promp['name'] = f"cartpole-three_poles"
kwargs_dict_cartpole3poles_promp['camera_id'] = 0
kwargs_dict_cartpole3poles_promp['name'] = f"dmc:cartpole-three_poles"
kwargs_dict_cartpole3poles_promp['wrappers'].append(suite.cartpole.ThreePolesMPWrapper)
kwargs_dict_cartpole3poles_promp['controller_kwargs']['p_gains'] = 10
kwargs_dict_cartpole3poles_promp['controller_kwargs']['d_gains'] = 10
@ -434,81 +443,85 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-three_poles_promp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"cartpole-three_poles",
# "time_limit": 1,
"camera_id": 0,
"episode_length": 1000,
"wrappers": [suite.cartpole.ThreePolesMPWrapper],
"traj_gen_kwargs": {
"num_dof": 1,
"num_basis": 5,
"duration": 10,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10
}
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_promp
# {
# "name": f"cartpole-three_poles",
# # "time_limit": 1,
# "camera_id": 0,
# "episode_length": 1000,
# "wrappers": [suite.cartpole.ThreePolesMPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 1,
# "num_basis": 5,
# "duration": 10,
# "policy_type": "motor",
# "weights_scale": 0.2,
# "zero_start": True,
# "policy_kwargs": {
# "p_gains": 10,
# "d_gains": 10
# }
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# DeepMind Manipulation
kwargs_dict_mani_reach_site_features_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_mani_reach_site_features_dmp['name'] = f"manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_dmp['name'] = f"dmc:manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_dmp['wrappers'].append(manipulation.reach_site.MPWrapper)
kwargs_dict_mani_reach_site_features_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 # TODO: weight scale 50, but goal scale 0.1
# TODO: weight scale 50, but goal scale 0.1
kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
register(
id=f'dmc_manipulation-reach_site_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs={
"name": f"manipulation-reach_site_features",
# "time_limit": 1,
"episode_length": 250,
"wrappers": [manipulation.reach_site.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 9,
"num_basis": 5,
"duration": 10,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1,
}
}
kwargs=kwargs_dict_mani_reach_site_features_dmp
# {
# "name": f"manipulation-reach_site_features",
# # "time_limit": 1,
# "episode_length": 250,
# "wrappers": [manipulation.reach_site.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 9,
# "num_basis": 5,
# "duration": 10,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "velocity",
# "weights_scale": 50,
# "goal_scale": 0.1,
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
kwargs_dict_mani_reach_site_features_promp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_mani_reach_site_features_promp['name'] = f"manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_promp['name'] = f"dmc:manipulation-reach_site_features"
kwargs_dict_mani_reach_site_features_promp['wrappers'].append(manipulation.reach_site.MPWrapper)
kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
register(
id=f'dmc_manipulation-reach_site_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"manipulation-reach_site_features",
# "time_limit": 1,
"episode_length": 250,
"wrappers": [manipulation.reach_site.MPWrapper],
"traj_gen_kwargs": {
"num_dof": 9,
"num_basis": 5,
"duration": 10,
"policy_type": "velocity",
"weights_scale": 0.2,
"zero_start": True,
}
}
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_promp
# {
# "name": f"manipulation-reach_site_features",
# # "time_limit": 1,
# "episode_length": 250,
# "wrappers": [manipulation.reach_site.MPWrapper],
# "traj_gen_kwargs": {
# "num_dof": 9,
# "num_basis": 5,
# "duration": 10,
# "policy_type": "velocity",
# "weights_scale": 0.2,
# "zero_start": True,
# }
# }
)
ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")

View File

@ -2,17 +2,22 @@
# License: MIT
# Copyright (c) 2020 Denis Yarats
import collections
from typing import Any, Dict, Tuple
from collections.abc import MutableMapping
from typing import Any, Dict, Tuple, Optional, Union, Callable
from dm_control import composer
import gym
import numpy as np
from dm_control import manipulation, suite
from dm_control.rl import control
from dm_env import specs
from gym import core, spaces
from gym import spaces
from gym.core import ObsType
def _spec_to_box(spec):
def extract_min_max(s):
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
assert s.dtype == np.float64 or s.dtype == np.float32, \
f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
dim = int(np.prod(s.shape))
if type(s) == specs.Array:
bound = np.inf * np.ones(dim, dtype=s.dtype)
@ -32,7 +37,7 @@ def _spec_to_box(spec):
return spaces.Box(low, high, dtype=s.dtype)
def _flatten_obs(obs: collections.MutableMapping):
def _flatten_obs(obs: MutableMapping):
"""
Flattens an observation of type MutableMapping, e.g. a dict to a 1D array.
Args:
@ -42,7 +47,7 @@ def _flatten_obs(obs: collections.MutableMapping):
"""
if not isinstance(obs, collections.MutableMapping):
if not isinstance(obs, MutableMapping):
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
# Keep key order consistent for non OrderedDicts
@ -52,47 +57,19 @@ def _flatten_obs(obs: collections.MutableMapping):
return np.concatenate(obs_vals)
class DMCWrapper(core.Env):
def __init__(
self,
domain_name: str,
task_name: str,
task_kwargs: dict = {},
visualize_reward: bool = True,
from_pixels: bool = False,
height: int = 84,
width: int = 84,
camera_id: int = 0,
frame_skip: int = 1,
environment_kwargs: dict = None,
channels_first: bool = True
):
assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.'
self._from_pixels = from_pixels
self._height = height
self._width = width
self._camera_id = camera_id
self._frame_skip = frame_skip
self._channels_first = channels_first
class DMCWrapper(gym.Env):
def __init__(self,
env: Callable[[], Union[composer.Environment, control.Environment]],
):
# create task
if domain_name == "manipulation":
assert not from_pixels and not task_name.endswith("_vision"), \
"TODO: Vision interface for manipulation is different to suite and needs to be implemented"
self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random'])
else:
self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
visualize_reward=visualize_reward, environment_kwargs=environment_kwargs)
# TODO: Currently this is required to be a function because dmc does not allow to copy composers environments
self._env = env()
# action and observation space
self._action_space = _spec_to_box([self._env.action_spec()])
self._observation_space = _spec_to_box(self._env.observation_spec().values())
self._last_state = None
self.viewer = None
# set seed
self.seed(seed=task_kwargs.get('random', 1))
self._window = None
def __getattr__(self, item):
"""Propagate only non-existent properties to wrapped env."""
@ -103,17 +80,7 @@ class DMCWrapper(core.Env):
return getattr(self._env, item)
def _get_obs(self, time_step):
if self._from_pixels:
obs = self.render(
mode="rgb_array",
height=self._height,
width=self._width,
camera_id=self._camera_id
)
if self._channels_first:
obs = obs.transpose(2, 0, 1).copy()
else:
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
obs = _flatten_obs(time_step.observation).astype(self.observation_space.dtype)
return obs
@property
@ -126,20 +93,7 @@ class DMCWrapper(core.Env):
@property
def dt(self):
return self._env.control_timestep() * self._frame_skip
@property
def base_step_limit(self):
"""
Returns: max_episode_steps of the underlying DMC env
"""
# Accessing private attribute because DMC does not expose time_limit or step_limit.
# Only the current time_step/time as well as the control_timestep can be accessed.
try:
return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip
except AttributeError as e:
return self._env._time_limit / self.dt
return self._env.control_timestep()
def seed(self, seed=None):
self._action_space.seed(seed)
@ -147,56 +101,71 @@ class DMCWrapper(core.Env):
def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]:
assert self._action_space.contains(action)
reward = 0
extra = {'internal_state': self._env.physics.get_state().copy()}
for _ in range(self._frame_skip):
time_step = self._env.step(action)
reward += time_step.reward or 0.
done = time_step.last()
if done:
break
self._last_state = _flatten_obs(time_step.observation)
time_step = self._env.step(action)
reward = time_step.reward or 0.
done = time_step.last()
obs = self._get_obs(time_step)
extra['discount'] = time_step.discount
return obs, reward, done, extra
def reset(self) -> np.ndarray:
def reset(self, *, seed: Optional[int] = None, return_info: bool = False,
options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]:
time_step = self._env.reset()
self._last_state = _flatten_obs(time_step.observation)
obs = self._get_obs(time_step)
return obs
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
if self._last_state is None:
raise ValueError('Environment not ready to render. Call reset() first.')
camera_id = camera_id or self._camera_id
def render(self, mode='rgb_array', height=240, width=320, camera_id=-1, overlays=(), depth=False,
segmentation=False, scene_option=None, render_flag_overrides=None):
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
if mode == "rgb_array":
height = height or self._height
width = width or self._width
return self._env.physics.render(height=height, width=width, camera_id=camera_id)
return self._env.physics.render(height=height, width=width, camera_id=camera_id, overlays=overlays,
depth=depth, segmentation=segmentation, scene_option=scene_option,
render_flag_overrides=render_flag_overrides)
elif mode == 'human':
if self.viewer is None:
# pylint: disable=import-outside-toplevel
# pylint: disable=g-import-not-at-top
from gym.envs.classic_control import rendering
self.viewer = rendering.SimpleImageViewer()
# Render max available buffer size. Larger is only possible by altering the XML.
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
width=self._env.physics.model.vis.global_.offwidth,
camera_id=camera_id)
self.viewer.imshow(img)
return self.viewer.isopen
# Render max available buffer size. Larger is only possible by altering the XML.
img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight,
width=self._env.physics.model.vis.global_.offwidth,
camera_id=camera_id, overlays=overlays, depth=depth, segmentation=segmentation,
scene_option=scene_option, render_flag_overrides=render_flag_overrides)
if depth:
img = np.dstack([img.astype(np.uint8)] * 3)
if mode == 'human':
try:
import cv2
if self._window is None:
self._window = cv2.namedWindow(self.id, cv2.WINDOW_AUTOSIZE)
cv2.imshow(self.id, img[..., ::-1]) # Image in BGR
cv2.waitKey(1)
except ImportError:
import pygame
img = img.transpose((1, 0, 2))
if self._window is None:
pygame.init()
pygame.display.init()
self._window = pygame.display.set_mode(img.shape[:2])
self._window.blit(pygame.surfarray.make_surface(img), (0, 0))
pygame.event.pump()
pygame.display.flip()
def close(self):
super().close()
if self.viewer is not None and self.viewer.isopen:
self.viewer.close()
if self._window is not None:
try:
import cv2
cv2.destroyWindow(self.id)
except ImportError:
import pygame
pygame.display.quit()
pygame.quit()
@property
def reward_range(self) -> Tuple[float, float]:
@ -204,3 +173,8 @@ class DMCWrapper(core.Env):
if isinstance(reward_spec, specs.BoundedArray):
return reward_spec.minimum, reward_spec.maximum
return -float('inf'), float('inf')
@property
def metadata(self):
return {'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': round(1.0 / self._env.control_timestep())}

View File

@ -1,3 +1,5 @@
import numpy as np
import alr_envs
@ -59,7 +61,8 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
"""
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
# E.g. here for way to many basis functions
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
# env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
env = alr_envs.make(env_name, seed)
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
@ -72,15 +75,16 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample() * 1000
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
print(i, rewards)
rewards = 0
obs = env.reset()
print(obs)
return obs
def example_fully_custom_mp(seed=1, iterations=1, render=True):
@ -139,7 +143,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
if __name__ == '__main__':
render = True
render = False
# # DMP
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
#
@ -150,7 +154,7 @@ if __name__ == '__main__':
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
# Altered basis functions
example_custom_mp("HopperJumpSparseProMP-v0", seed=10, iterations=10, render=render)
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
# Custom MP
# example_fully_custom_mp(seed=10, iterations=1, render=render)

View File

@ -36,7 +36,7 @@ for _task in _goal_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_goal_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_goal_change_promp['wrappers'].append(goal_change_mp_wrapper.MPWrapper)
kwargs_dict_goal_change_promp['name'] = _task
kwargs_dict_goal_change_promp['name'] = f'metaworld:{_task}'
register(
id=_env_id,
@ -52,7 +52,7 @@ for _task in _object_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_object_change_promp['wrappers'].append(object_change_mp_wrapper.MPWrapper)
kwargs_dict_object_change_promp['name'] = _task
kwargs_dict_object_change_promp['name'] = f'metaworld:{_task}'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
@ -77,7 +77,7 @@ for _task in _goal_and_object_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_goal_and_object_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_goal_and_object_change_promp['wrappers'].append(goal_object_change_mp_wrapper.MPWrapper)
kwargs_dict_goal_and_object_change_promp['name'] = _task
kwargs_dict_goal_and_object_change_promp['name'] = f'metaworld:{_task}'
register(
id=_env_id,
@ -93,7 +93,7 @@ for _task in _goal_and_endeffector_change_envs:
_env_id = f'{name}ProMP-{task_id_split[-1]}'
kwargs_dict_goal_and_endeffector_change_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_goal_and_endeffector_change_promp['wrappers'].append(goal_endeffector_change_mp_wrapper.MPWrapper)
kwargs_dict_goal_and_endeffector_change_promp['name'] = _task
kwargs_dict_goal_and_endeffector_change_promp['name'] = f'metaworld:{_task}'
register(
id=_env_id,

View File

@ -27,7 +27,6 @@ DEFAULT_BB_DICT_ProMP = {
}
}
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_reacher_promp['controller_kwargs']['p_gains'] = 0.6
kwargs_dict_reacher_promp['controller_kwargs']['d_gains'] = 0.075
@ -35,7 +34,7 @@ kwargs_dict_reacher_promp['basis_generator_kwargs']['num_basis'] = 6
kwargs_dict_reacher_promp['name'] = "Reacher-v2"
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher_v2.MPWrapper)
register(
id='Reacher2dProMP-v2',
id='ReacherProMP-v2',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_promp
)

View File

@ -1,65 +1 @@
import re
from typing import Union
import gym
from gym.envs.registration import register
from alr_envs.utils.make_env_helpers import make
def make_dmc(
id: str,
seed: int = 1,
visualize_reward: bool = True,
from_pixels: bool = False,
height: int = 84,
width: int = 84,
camera_id: int = 0,
frame_skip: int = 1,
episode_length: Union[None, int] = None,
environment_kwargs: dict = {},
time_limit: Union[None, float] = None,
channels_first: bool = True
):
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
# License: MIT
# Copyright (c) 2020 Denis Yarats
if not re.match(r"\w+-\w+", id):
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
domain_name, task_name = id.split("-")
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
if from_pixels:
assert not visualize_reward, 'Cannot use visualize reward when learning from pixels.'
# Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
episode_length = episode_length or (250 if domain_name == "manipulation" else 1000)
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
if env_id not in gym.envs.registry.env_specs:
task_kwargs = {'random': seed}
# if seed is not None:
# task_kwargs['random'] = seed
if time_limit is not None:
task_kwargs['time_limit'] = time_limit
register(
id=env_id,
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
kwargs=dict(
domain_name=domain_name,
task_name=task_name,
task_kwargs=task_kwargs,
environment_kwargs=environment_kwargs,
visualize_reward=visualize_reward,
from_pixels=from_pixels,
height=height,
width=width,
camera_id=camera_id,
frame_skip=frame_skip,
channels_first=channels_first,
),
max_episode_steps=max_episode_steps,
)
return gym.make(env_id)

View File

@ -1,20 +1,41 @@
import warnings
import re
import uuid
from collections.abc import MutableMapping
from copy import deepcopy
from typing import Iterable, Type, Union, MutableMapping
from math import ceil
from typing import Iterable, Type, Union
import gym
import numpy as np
from gym.envs.registration import EnvSpec, registry
import alr_envs
try:
from dm_control import suite, manipulation, composer
from dm_control.rl import control
except ImportError:
pass
try:
import metaworld
except Exception:
# catch Exception due to Mujoco-py
pass
from gym.envs.registration import registry
from gym.envs.registration import register
from gym.wrappers import TimeAwareObservation
from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
from alr_envs.black_box.factory.controller_factory import get_controller
from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
from alr_envs.black_box.factory.controller_factory import get_controller
from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import nested_update
ALL_FRAMEWORK_TYPES = ['meta', 'dmc', 'gym']
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
"""
@ -70,57 +91,25 @@ def _make(env_id: str, seed, **kwargs):
# env_id.split(':')
# if 'dmc' :
try:
# This access is required to allow for nested dict updates for BB envs
spec = registry.get(env_id)
all_kwargs = deepcopy(spec.kwargs)
nested_update(all_kwargs, kwargs)
kwargs = all_kwargs
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
if env_id.startswith("dmc"):
kwargs.update({"seed": seed})
# Gym
env = gym.make(env_id, **kwargs)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
except (gym.error.Error, AttributeError):
if ':' in env_id:
split_id = env_id.split(':')
framework, env_id = split_id[-2:]
else:
framework = None
if framework == 'metaworld':
# MetaWorld env
import metaworld
if env_id in metaworld.ML1.ENV_NAMES:
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
env._freeze_rand_vec = False
env.seeded_rand_vec = True
# Manually set spec, as metaworld environments are not registered via gym
env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
# env.seed(seed)
# env.action_space.seed(seed)
# env.observation_space.seed(seed)
# env.goal_space.seed(seed)
else:
# DMC
from alr_envs import make_dmc
env = make_dmc(env_id, seed=seed, **kwargs)
if not env.base_step_limit == env.spec.max_episode_steps:
raise ValueError(f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym "
f"is different from the DMC environment specification of {env.base_step_limit} steps.")
env = make_metaworld(env_id, seed=seed, **kwargs)
elif framework == 'dmc':
# DeepMind Controlp
env = make_dmc(env_id, seed=seed, **kwargs)
else:
env = make_gym(env_id, seed=seed, **kwargs)
return env
def _make_wrapped_env(
env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs
):
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
"""
Helper function for creating a wrapped gym environment using MPs.
It adds all provided wrappers to the specified environment and verifies at least one RawInterfaceWrapper is
@ -149,7 +138,7 @@ def _make_wrapped_env(
def make_bb(
env_id: str, wrappers: Iterable, black_box_kwargs: MutableMapping, traj_gen_kwargs: MutableMapping,
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed: int = 1,
**kwargs):
"""
This can also be used standalone for manually building a custom DMP environment.
@ -167,7 +156,6 @@ def make_bb(
"""
_verify_time_limit(traj_gen_kwargs.get("duration", None), kwargs.get("time_limit", None))
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
learn_sub_trajs = black_box_kwargs.get('learn_sub_trajectories')
do_replanning = black_box_kwargs.get('replanning_schedule')
@ -176,12 +164,16 @@ def make_bb(
if learn_sub_trajs or do_replanning:
# add time_step observation when replanning
kwargs['wrappers'].append(TimeAwareObservation)
if not any(issubclass(w, TimeAwareObservation) for w in kwargs['wrappers']):
# Add as first wrapper in order to alter observation
kwargs['wrappers'].insert(0, TimeAwareObservation)
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(_env.action_space.shape).item())
env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
traj_gen_kwargs['action_dim'] = traj_gen_kwargs.get('action_dim', np.prod(env.action_space.shape).item())
if black_box_kwargs.get('duration') is None:
black_box_kwargs['duration'] = _env.spec.max_episode_steps * _env.dt
black_box_kwargs['duration'] = env.spec.max_episode_steps * env.dt
if phase_kwargs.get('tau') is None:
phase_kwargs['tau'] = black_box_kwargs['duration']
@ -194,7 +186,7 @@ def make_bb(
controller = get_controller(**controller_kwargs)
traj_gen = get_trajectory_generator(basis_generator=basis_gen, **traj_gen_kwargs)
bb_env = BlackBoxWrapper(_env, trajectory_generator=traj_gen, tracking_controller=controller,
bb_env = BlackBoxWrapper(env, trajectory_generator=traj_gen, tracking_controller=controller,
**black_box_kwargs)
return bb_env
@ -249,6 +241,109 @@ def make_bb_env_helper(**kwargs):
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
def make_dmc(
env_id: Union[str, composer.Environment, control.Environment],
seed: int = None,
visualize_reward: bool = True,
time_limit: Union[None, float] = None,
**kwargs
):
if not re.match(r"\w+-\w+", env_id):
raise ValueError("env_id does not have the following structure: 'domain_name-task_name'")
domain_name, task_name = env_id.split("-")
if task_name.endswith("_vision"):
# TODO
raise ValueError("The vision interface for manipulation tasks is currently not supported.")
if (domain_name, task_name) not in suite.ALL_TASKS and task_name not in manipulation.ALL:
raise ValueError(f'Specified domain "{domain_name}" and task "{task_name}" combination does not exist.')
# env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
gym_id = uuid.uuid4().hex + '-v1'
task_kwargs = {'random': seed}
if time_limit is not None:
task_kwargs['time_limit'] = time_limit
# create task
# Accessing private attribute because DMC does not expose time_limit or step_limit.
# Only the current time_step/time as well as the control_timestep can be accessed.
if domain_name == "manipulation":
env = manipulation.load(environment_name=task_name, seed=seed)
max_episode_steps = ceil(env._time_limit / env.control_timestep())
else:
env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs,
visualize_reward=visualize_reward, environment_kwargs=kwargs)
max_episode_steps = int(env._step_limit)
register(
id=gym_id,
entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper',
kwargs={'env': lambda: env},
max_episode_steps=max_episode_steps,
)
env = gym.make(gym_id)
env.seed(seed=seed)
return env
def make_metaworld(env_id, seed, **kwargs):
if env_id not in metaworld.ML1.ENV_NAMES:
raise ValueError(f'Specified environment "{env_id}" not present in metaworld ML1.')
_env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
_env._freeze_rand_vec = False
# New argument to use global seeding
_env.seeded_rand_vec = True
# Manually set spec, as metaworld environments are not registered via gym
# _env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
# _env = gym.wrappers.TimeLimit(_env, max_episode_steps=_env.max_path_length)
# _env.seed(seed)
# _env.action_space.seed(seed)
# _env.observation_space.seed(seed)
# _env.goal_space.seed(seed)
gym_id = uuid.uuid4().hex + '-v1'
register(
id=gym_id,
entry_point=lambda: _env,
max_episode_steps=_env.max_path_length,
)
# TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld
env = gym.make(gym_id, disable_env_checker=True)
env.seed(seed=seed)
return env
def make_gym(env_id, seed, **kwargs):
# This access is required to allow for nested dict updates for BB envs
spec = registry.get(env_id)
all_kwargs = deepcopy(spec.kwargs)
nested_update(all_kwargs, kwargs)
kwargs = all_kwargs
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
# if env_id.startswith("dmc") or any(s in env_id.lower() for s in ['promp', 'dmp', 'prodmp']):
all_bb_envs = sum(alr_envs.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), [])
if env_id.startswith("dmc") or env_id in all_bb_envs:
kwargs.update({"seed": seed})
# Gym
env = gym.make(env_id, **kwargs)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
return env
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
"""
When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.

View File

@ -40,9 +40,9 @@ class TestMPEnvironments(unittest.TestCase):
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
actions = env.action_space.sample()
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
obs, reward, done, info = env.step(actions)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
@ -55,13 +55,13 @@ class TestMPEnvironments(unittest.TestCase):
env.render("human")
if done:
obs = env.reset()
break
assert done, "Done flag is not True after max episode length."
assert done, "Done flag is not True after end of episode."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones)
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _run_env_determinism(self, ids):
seed = 0
@ -70,8 +70,9 @@ class TestMPEnvironments(unittest.TestCase):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.allclose(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
@ -81,7 +82,7 @@ class TestMPEnvironments(unittest.TestCase):
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
@ -113,12 +114,12 @@ class TestMPEnvironments(unittest.TestCase):
def test_dmc_environment_functionality(self):
"""Tests that environments runs without errors using random actions for DMC MP envs."""
with self.subTest(msg="DMP"):
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']:
for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']:
with self.subTest(msg=env_id):
self._run_env(env_id)
with self.subTest(msg="ProMP"):
for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']:
for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']:
with self.subTest(msg=env_id):
self._run_env(env_id)
@ -151,9 +152,9 @@ class TestMPEnvironments(unittest.TestCase):
def test_dmc_environment_determinism(self):
"""Tests that identical seeds produce identical trajectories for DMC MP Envs."""
with self.subTest(msg="DMP"):
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"])
self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"])
with self.subTest(msg="ProMP"):
self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"])
self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"])
def test_metaworld_environment_determinism(self):
"""Tests that identical seeds produce identical trajectories for Metaworld MP Envs."""

View File

@ -7,8 +7,8 @@ from dm_control import suite, manipulation
from alr_envs import make
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
SEED = 1
@ -29,9 +29,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
Returns:
"""
print(env_id)
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
actions = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
@ -43,6 +45,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
observations.append(obs)
ac = env.action_space.sample()
actions.append(ac)
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
@ -57,13 +60,13 @@ class TestStepDMCEnvironments(unittest.TestCase):
env.render("human")
if done:
obs = env.reset()
break
assert done, "Done flag is not True after max episode length."
assert done, "Done flag is not True after end of episode."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones)
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
@ -71,7 +74,7 @@ class TestStepDMCEnvironments(unittest.TestCase):
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
@ -91,8 +94,9 @@ class TestStepDMCEnvironments(unittest.TestCase):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
@ -111,11 +115,11 @@ class TestStepDMCEnvironments(unittest.TestCase):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':

View File

@ -6,7 +6,7 @@ import numpy as np
from alr_envs import make
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
SEED = 1
@ -57,9 +57,9 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
env.render("human")
if done:
obs = env.reset()
break
assert done, "Done flag is not True after max episode length."
assert done, "Done flag is not True after end of episode."
observations.append(obs)
env.close()
del env
@ -71,7 +71,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
@ -94,7 +94,7 @@ class TestStepMetaWorlEnvironments(unittest.TestCase):
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")