2021-11-30 16:11:32 +01:00
|
|
|
import warnings
|
2022-05-03 19:51:54 +02:00
|
|
|
from typing import Iterable, Type, Union, Mapping, MutableMapping
|
2021-06-25 15:51:06 +02:00
|
|
|
|
2021-04-21 10:45:34 +02:00
|
|
|
import gym
|
2021-07-30 11:59:02 +02:00
|
|
|
import numpy as np
|
2021-08-19 09:30:54 +02:00
|
|
|
from gym.envs.registration import EnvSpec
|
2021-04-21 10:45:34 +02:00
|
|
|
|
2021-06-25 15:51:06 +02:00
|
|
|
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
2021-11-15 09:10:03 +01:00
|
|
|
from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper
|
2022-05-03 19:51:54 +02:00
|
|
|
from mp_pytorch import MPInterface
|
|
|
|
|
|
|
|
from alr_envs.mp.basis_generator_factory import get_basis_generator
|
|
|
|
from alr_envs.mp.controllers.base_controller import BaseController
|
|
|
|
from alr_envs.mp.controllers.controller_factory import get_controller
|
|
|
|
from alr_envs.mp.mp_factory import get_movement_primitive
|
|
|
|
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
|
|
|
|
from alr_envs.mp.phase_generator_factory import get_phase_generator
|
2021-06-25 15:51:06 +02:00
|
|
|
|
|
|
|
|
2021-08-19 09:30:54 +02:00
|
|
|
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
2021-06-25 15:51:06 +02:00
|
|
|
"""
|
2021-06-28 17:25:53 +02:00
|
|
|
TODO: Do we need this?
|
|
|
|
Generate a callable to create a new gym environment with a given seed.
|
2021-06-25 15:51:06 +02:00
|
|
|
The rank is added to the seed and can be used for example when using vector environments.
|
2021-08-19 09:30:54 +02:00
|
|
|
E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
2021-06-25 15:51:06 +02:00
|
|
|
with seeds 123 through 130.
|
|
|
|
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
|
2021-08-19 09:30:54 +02:00
|
|
|
Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
2021-06-25 15:51:06 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
env_id: name of the environment
|
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
rank: environment rank for deterministic over multiple seeds behaviour
|
2021-07-30 11:59:02 +02:00
|
|
|
return_callable: If True returns a callable to create the environment instead of the environment itself.
|
2021-04-21 10:45:34 +02:00
|
|
|
|
2021-06-25 15:51:06 +02:00
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
2021-07-30 11:59:02 +02:00
|
|
|
|
|
|
|
def f():
|
2021-08-19 09:30:54 +02:00
|
|
|
return make(env_id, seed + rank, **kwargs)
|
2021-07-30 11:59:02 +02:00
|
|
|
|
|
|
|
return f if return_callable else f()
|
2021-04-21 10:45:34 +02:00
|
|
|
|
|
|
|
|
2021-08-19 09:30:54 +02:00
|
|
|
def make(env_id: str, seed, **kwargs):
|
2021-06-28 17:25:53 +02:00
|
|
|
"""
|
|
|
|
Converts an env_id to an environment with the gym API.
|
2021-07-02 13:09:56 +02:00
|
|
|
This also works for DeepMind Control Suite interface_wrappers
|
2021-06-28 17:25:53 +02:00
|
|
|
for which domain name and task name are expected to be separated by "-".
|
|
|
|
Args:
|
|
|
|
env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
|
|
|
|
**kwargs: Additional kwargs for the constructor such as pixel observations, etc.
|
|
|
|
|
|
|
|
Returns: Gym environment
|
|
|
|
|
|
|
|
"""
|
2021-11-30 16:11:32 +01:00
|
|
|
if any([det_pmp in env_id for det_pmp in ["DetPMP", "detpmp"]]):
|
|
|
|
warnings.warn("DetPMP is deprecated and converted to ProMP")
|
|
|
|
env_id = env_id.replace("DetPMP", "ProMP")
|
|
|
|
env_id = env_id.replace("detpmp", "promp")
|
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
try:
|
2021-07-02 13:09:56 +02:00
|
|
|
# Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
|
2021-06-29 16:17:18 +02:00
|
|
|
if env_id.startswith("dmc"):
|
|
|
|
kwargs.update({"seed": seed})
|
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
# Gym
|
|
|
|
env = gym.make(env_id, **kwargs)
|
|
|
|
env.seed(seed)
|
2021-07-26 17:07:17 +02:00
|
|
|
env.action_space.seed(seed)
|
|
|
|
env.observation_space.seed(seed)
|
2021-06-28 17:25:53 +02:00
|
|
|
except gym.error.Error:
|
|
|
|
|
2021-08-19 09:30:54 +02:00
|
|
|
# MetaWorld env
|
|
|
|
import metaworld
|
|
|
|
if env_id in metaworld.ML1.ENV_NAMES:
|
|
|
|
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
|
|
|
# setting this avoids generating the same initialization after each reset
|
|
|
|
env._freeze_rand_vec = False
|
|
|
|
# Manually set spec, as metaworld environments are not registered via gym
|
|
|
|
env.unwrapped.spec = EnvSpec(env_id)
|
|
|
|
# Set Timelimit based on the maximum allowed path length of the environment
|
|
|
|
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
|
|
|
|
env.seed(seed)
|
|
|
|
env.action_space.seed(seed)
|
|
|
|
env.observation_space.seed(seed)
|
|
|
|
env.goal_space.seed(seed)
|
|
|
|
|
|
|
|
else:
|
|
|
|
# DMC
|
2021-08-25 17:16:20 +02:00
|
|
|
from alr_envs import make_dmc
|
2021-08-19 09:30:54 +02:00
|
|
|
env = make_dmc(env_id, seed=seed, **kwargs)
|
|
|
|
|
|
|
|
assert env.base_step_limit == env.spec.max_episode_steps, \
|
|
|
|
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
|
|
|
|
f"the DMC environment specification of {env.base_step_limit} steps."
|
2021-07-19 14:05:25 +02:00
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
return env
|
2021-04-21 10:45:34 +02:00
|
|
|
|
|
|
|
|
2022-05-03 19:51:54 +02:00
|
|
|
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
|
|
|
|
ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
|
2021-06-25 15:51:06 +02:00
|
|
|
"""
|
|
|
|
Helper function for creating a wrapped gym environment using MPs.
|
|
|
|
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
|
|
|
|
provided to expose the interface for MPs.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env_id: name of the environment
|
|
|
|
wrappers: list of wrappers (at least an MPEnvWrapper),
|
2021-06-28 17:25:53 +02:00
|
|
|
seed: seed of environment
|
2021-06-25 15:51:06 +02:00
|
|
|
|
|
|
|
Returns: gym environment with all specified wrappers applied
|
|
|
|
|
|
|
|
"""
|
2021-06-28 17:25:53 +02:00
|
|
|
# _env = gym.make(env_id)
|
2021-08-19 09:30:54 +02:00
|
|
|
_env = make(env_id, seed, **kwargs)
|
2022-05-03 19:51:54 +02:00
|
|
|
has_episodic_wrapper = False
|
2021-06-25 15:51:06 +02:00
|
|
|
for w in wrappers:
|
2022-05-03 19:51:54 +02:00
|
|
|
# only wrap the environment if not EpisodicWrapper, e.g. for vision
|
|
|
|
if not issubclass(w, EpisodicWrapper):
|
|
|
|
_env = w(_env)
|
|
|
|
else: # if EpisodicWrapper, use specific constructor
|
|
|
|
has_episodic_wrapper = True
|
|
|
|
_env = w(env=_env, mp=mp, controller=controller, **ep_wrapper_kwargs)
|
|
|
|
assert has_episodic_wrapper, \
|
|
|
|
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
|
|
|
|
return _env
|
|
|
|
|
|
|
|
def make_mp_from_kwargs(
|
|
|
|
env_id: str, wrappers: Iterable, ep_wrapper_kwargs: MutableMapping, mp_kwargs: MutableMapping,
|
|
|
|
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
|
|
|
|
sequenced=False, **kwargs
|
|
|
|
):
|
|
|
|
"""
|
|
|
|
This can also be used standalone for manually building a custom DMP environment.
|
|
|
|
Args:
|
|
|
|
ep_wrapper_kwargs:
|
|
|
|
basis_kwargs:
|
|
|
|
phase_kwargs:
|
|
|
|
controller_kwargs:
|
|
|
|
env_id: base_env_name,
|
|
|
|
wrappers: list of wrappers (at least an EpisodicWrapper),
|
|
|
|
seed: seed of environment
|
|
|
|
sequenced: When true, this allows to sequence multiple ProMPs by specifying the duration of each sub-trajectory,
|
|
|
|
this behavior is much closer to step based learning.
|
|
|
|
mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
2021-06-25 15:51:06 +02:00
|
|
|
|
2022-05-03 19:51:54 +02:00
|
|
|
Returns: DMP wrapped gym env
|
|
|
|
|
|
|
|
"""
|
|
|
|
_verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
|
|
|
dummy_env = make(env_id, seed)
|
|
|
|
if ep_wrapper_kwargs.get('duration', None) is None:
|
|
|
|
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
|
|
|
|
if phase_kwargs.get('tau', None) is None:
|
|
|
|
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
|
2022-05-05 18:50:20 +02:00
|
|
|
mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
|
2022-05-03 19:51:54 +02:00
|
|
|
phase_gen = get_phase_generator(**phase_kwargs)
|
|
|
|
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
|
|
|
controller = get_controller(**controller_kwargs)
|
2022-05-05 18:50:20 +02:00
|
|
|
mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
|
2022-05-03 19:51:54 +02:00
|
|
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
|
|
|
|
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
|
2021-06-25 15:51:06 +02:00
|
|
|
return _env
|
|
|
|
|
|
|
|
|
2022-05-03 19:51:54 +02:00
|
|
|
def make_mp_env_helper(**kwargs):
|
|
|
|
"""
|
|
|
|
Helper function for registering a DMP gym environments.
|
|
|
|
Args:
|
|
|
|
**kwargs: expects at least the following:
|
|
|
|
{
|
|
|
|
"name": base environment name.
|
|
|
|
"wrappers": list of wrappers (at least an EpisodicWrapper is required),
|
|
|
|
"movement_primitives_kwargs": {
|
|
|
|
"movement_primitives_type": type_of_your_movement_primitive,
|
|
|
|
non default arguments for the movement primitive instance
|
|
|
|
...
|
|
|
|
}
|
|
|
|
"controller_kwargs": {
|
|
|
|
"controller_type": type_of_your_controller,
|
|
|
|
non default arguments for the controller instance
|
|
|
|
...
|
|
|
|
},
|
|
|
|
"basis_generator_kwargs": {
|
|
|
|
"basis_generator_type": type_of_your_basis_generator,
|
|
|
|
non default arguments for the basis generator instance
|
|
|
|
...
|
|
|
|
},
|
|
|
|
"phase_generator_kwargs": {
|
|
|
|
"phase_generator_type": type_of_your_phase_generator,
|
|
|
|
non default arguments for the phase generator instance
|
|
|
|
...
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
Returns: MP wrapped gym env
|
|
|
|
|
|
|
|
"""
|
|
|
|
seed = kwargs.pop("seed", None)
|
|
|
|
wrappers = kwargs.pop("wrappers")
|
|
|
|
|
|
|
|
mp_kwargs = kwargs.pop("movement_primitives_kwargs")
|
|
|
|
ep_wrapper_kwargs = kwargs.pop('ep_wrapper_kwargs')
|
|
|
|
contr_kwargs = kwargs.pop("controller_kwargs")
|
|
|
|
phase_kwargs = kwargs.pop("phase_generator_kwargs")
|
|
|
|
basis_kwargs = kwargs.pop("basis_generator_kwargs")
|
|
|
|
|
|
|
|
return make_mp_from_kwargs(env_id=kwargs.pop("name"), wrappers=wrappers, ep_wrapper_kwargs=ep_wrapper_kwargs,
|
|
|
|
mp_kwargs=mp_kwargs, controller_kwargs=contr_kwargs, phase_kwargs=phase_kwargs,
|
|
|
|
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
|
|
|
|
2021-06-30 15:00:36 +02:00
|
|
|
def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
|
2021-06-25 15:51:06 +02:00
|
|
|
"""
|
|
|
|
This can also be used standalone for manually building a custom DMP environment.
|
|
|
|
Args:
|
|
|
|
env_id: base_env_name,
|
|
|
|
wrappers: list of wrappers (at least an MPEnvWrapper),
|
2021-06-28 17:25:53 +02:00
|
|
|
seed: seed of environment
|
2021-06-25 15:51:06 +02:00
|
|
|
mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
|
|
|
|
|
|
|
Returns: DMP wrapped gym env
|
|
|
|
|
|
|
|
"""
|
2021-08-25 17:16:20 +02:00
|
|
|
_verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
2021-06-25 15:51:06 +02:00
|
|
|
|
2021-06-30 15:00:36 +02:00
|
|
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
2021-07-30 11:59:02 +02:00
|
|
|
|
2021-08-25 17:16:20 +02:00
|
|
|
_verify_dof(_env, mp_kwargs.get("num_dof"))
|
2021-07-30 11:59:02 +02:00
|
|
|
|
2021-06-25 15:51:06 +02:00
|
|
|
return DmpWrapper(_env, **mp_kwargs)
|
|
|
|
|
|
|
|
|
2021-11-15 09:10:03 +01:00
|
|
|
def make_promp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
|
|
|
|
"""
|
|
|
|
This can also be used standalone for manually building a custom ProMP environment.
|
|
|
|
Args:
|
|
|
|
env_id: base_env_name,
|
|
|
|
wrappers: list of wrappers (at least an MPEnvWrapper),
|
|
|
|
mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int}
|
|
|
|
|
|
|
|
Returns: ProMP wrapped gym env
|
|
|
|
|
|
|
|
"""
|
|
|
|
_verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
|
|
|
|
|
|
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs)
|
|
|
|
|
|
|
|
_verify_dof(_env, mp_kwargs.get("num_dof"))
|
|
|
|
|
|
|
|
return ProMPWrapper(_env, **mp_kwargs)
|
|
|
|
|
|
|
|
|
2021-06-25 15:51:06 +02:00
|
|
|
def make_dmp_env_helper(**kwargs):
|
|
|
|
"""
|
|
|
|
Helper function for registering a DMP gym environments.
|
|
|
|
Args:
|
|
|
|
**kwargs: expects at least the following:
|
|
|
|
{
|
|
|
|
"name": base_env_name,
|
|
|
|
"wrappers": list of wrappers (at least an MPEnvWrapper),
|
|
|
|
"mp_kwargs": dict of at least {num_dof: int, num_basis: int} for DMP
|
|
|
|
}
|
|
|
|
|
|
|
|
Returns: DMP wrapped gym env
|
|
|
|
|
|
|
|
"""
|
2021-06-30 15:00:36 +02:00
|
|
|
seed = kwargs.pop("seed", None)
|
2021-06-29 16:17:18 +02:00
|
|
|
return make_dmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed,
|
2021-06-30 15:00:36 +02:00
|
|
|
mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs)
|
2021-06-25 15:51:06 +02:00
|
|
|
|
|
|
|
|
2021-11-15 09:10:03 +01:00
|
|
|
def make_promp_env_helper(**kwargs):
|
|
|
|
"""
|
|
|
|
Helper function for registering ProMP gym environments.
|
|
|
|
This can also be used standalone for manually building a custom ProMP environment.
|
|
|
|
Args:
|
|
|
|
**kwargs: expects at least the following:
|
|
|
|
{
|
|
|
|
"name": base_env_name,
|
|
|
|
"wrappers": list of wrappers (at least an MPEnvWrapper),
|
|
|
|
"mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int}
|
|
|
|
}
|
|
|
|
|
|
|
|
Returns: ProMP wrapped gym env
|
|
|
|
|
|
|
|
"""
|
|
|
|
seed = kwargs.pop("seed", None)
|
|
|
|
return make_promp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed,
|
|
|
|
mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs)
|
|
|
|
|
|
|
|
|
2021-08-25 17:16:20 +02:00
|
|
|
def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]):
|
2021-07-19 14:05:25 +02:00
|
|
|
"""
|
|
|
|
When using DMC check if a manually specified time limit matches the trajectory duration the MP receives.
|
|
|
|
Mostly, the time_limit for DMC is not specified and the default values from DMC are taken.
|
|
|
|
This check, however, can only been done after instantiating the environment.
|
|
|
|
It can be found in the BaseMP class.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
mp_time_limit: max trajectory length of mp in seconds
|
|
|
|
env_time_limit: max trajectory length of DMC environment in seconds
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
|
|
|
if mp_time_limit is not None and env_time_limit is not None:
|
|
|
|
assert mp_time_limit == env_time_limit, \
|
2021-07-30 11:59:02 +02:00
|
|
|
f"The specified 'time_limit' of {env_time_limit}s does not match " \
|
2021-07-19 14:05:25 +02:00
|
|
|
f"the duration of {mp_time_limit}s for the MP."
|
2021-07-30 11:59:02 +02:00
|
|
|
|
|
|
|
|
2021-08-25 17:16:20 +02:00
|
|
|
def _verify_dof(base_env: gym.Env, dof: int):
|
2021-07-30 11:59:02 +02:00
|
|
|
action_shape = np.prod(base_env.action_space.shape)
|
|
|
|
assert dof == action_shape, \
|
|
|
|
f"The specified degrees of freedom ('num_dof') {dof} do not match " \
|
|
|
|
f"the action space of {action_shape} the base environments"
|