integrated metaworld tasks into the framework
This commit is contained in:
parent
a11965827d
commit
9b1ccb3235
@ -1,12 +1,12 @@
|
|||||||
from gym.envs.registration import register
|
from gym.envs.registration import register
|
||||||
from gym.wrappers import FlattenObservation
|
from gym.wrappers import FlattenObservation
|
||||||
|
|
||||||
from alr_envs import classic_control, dmc, open_ai
|
from alr_envs import classic_control, dmc, open_ai, meta
|
||||||
|
|
||||||
from alr_envs.utils.make_env_helpers import make_dmp_env
|
from alr_envs.utils.make_env_helpers import make_dmp_env
|
||||||
from alr_envs.utils.make_env_helpers import make_detpmp_env
|
from alr_envs.utils.make_env_helpers import make_detpmp_env
|
||||||
from alr_envs.utils.make_env_helpers import make_env
|
from alr_envs.utils.make_env_helpers import make
|
||||||
from alr_envs.utils.make_env_helpers import make_env_rank
|
from alr_envs.utils.make_env_helpers import make_rank
|
||||||
|
|
||||||
# Mujoco
|
# Mujoco
|
||||||
|
|
||||||
@ -305,13 +305,13 @@ register(
|
|||||||
# max_episode_steps=1,
|
# max_episode_steps=1,
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"ball_in_cup-catch",
|
"name": f"ball_in_cup-catch",
|
||||||
"time_limit": 1,
|
"time_limit": 2,
|
||||||
"episode_length": 50,
|
"episode_length": 100,
|
||||||
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
|
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 2,
|
"num_dof": 2,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 1,
|
"duration": 2,
|
||||||
"learn_goal": True,
|
"learn_goal": True,
|
||||||
"alpha_phase": 2,
|
"alpha_phase": 2,
|
||||||
"bandwidth_factor": 2,
|
"bandwidth_factor": 2,
|
||||||
@ -331,16 +331,16 @@ register(
|
|||||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"ball_in_cup-catch",
|
"name": f"ball_in_cup-catch",
|
||||||
"time_limit": 1,
|
"time_limit": 2,
|
||||||
"episode_length": 50,
|
"episode_length": 100,
|
||||||
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
|
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
|
||||||
"mp_kwargs": {
|
"mp_kwargs": {
|
||||||
"num_dof": 2,
|
"num_dof": 2,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
"duration": 1,
|
"duration": 2,
|
||||||
"width": 0.025,
|
"width": 0.025,
|
||||||
"policy_type": "motor",
|
"policy_type": "motor",
|
||||||
"weights_scale": 0.2,
|
"weights_scale": 1,
|
||||||
"zero_start": True,
|
"zero_start": True,
|
||||||
"policy_kwargs": {
|
"policy_kwargs": {
|
||||||
"p_gains": 50,
|
"p_gains": 50,
|
||||||
@ -875,6 +875,23 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='FetchSlideDetPMP-v1',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "gym.envs.robotics:FetchSlide-v1",
|
||||||
|
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 4,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.02,
|
||||||
|
"policy_type": "position"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='FetchReachDenseDetPMP-v1',
|
id='FetchReachDenseDetPMP-v1',
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
@ -891,3 +908,38 @@ register(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='FetchReachDetPMP-v1',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "gym.envs.robotics:FetchReach-v1",
|
||||||
|
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 4,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.02,
|
||||||
|
"policy_type": "position"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ButtonPressDetPMP-v2',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "button-press-v2",
|
||||||
|
"wrappers": [meta.button_press.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 4,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 6.25,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.025,
|
||||||
|
"policy_type": "position"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
3
alr_envs/dmc/README.MD
Normal file
3
alr_envs/dmc/README.MD
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# DeepMind Control (DMC) Wrappers
|
||||||
|
|
||||||
|
These are the Environment Wrappers for selected [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them.
|
@ -17,7 +17,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
env = alr_envs.make_env(env_id, seed)
|
env = alr_envs.make(env_id, seed)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
print("observation shape:", env.observation_space.shape)
|
print("observation shape:", env.observation_space.shape)
|
||||||
|
@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
env = alr_envs.make_env(env_id, seed)
|
env = alr_envs.make(env_id, seed)
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
print("Observation shape: ", env.observation_space.shape)
|
print("Observation shape: ", env.observation_space.shape)
|
||||||
@ -56,7 +56,7 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
|
|||||||
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
|
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
|
||||||
|
|
||||||
"""
|
"""
|
||||||
env = gym.vector.AsyncVectorEnv([alr_envs.make_env_rank(env_id, seed, i) for i in range(n_cpu)])
|
env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)])
|
||||||
# OR
|
# OR
|
||||||
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
from alr_envs import MPWrapper
|
import alr_envs
|
||||||
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
|
||||||
|
|
||||||
|
|
||||||
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
|
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
|
||||||
@ -16,7 +15,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
|
|||||||
"""
|
"""
|
||||||
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
||||||
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
|
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
|
||||||
env = make_env(env_name, seed)
|
env = alr_envs.make(env_name, seed)
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
# env.render(mode=None)
|
# env.render(mode=None)
|
||||||
@ -71,7 +70,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
|
|||||||
"weights_scale": 50,
|
"weights_scale": 50,
|
||||||
"goal_scale": 0.1
|
"goal_scale": 0.1
|
||||||
}
|
}
|
||||||
env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
|
env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
|
||||||
|
|
||||||
# This time rendering every trajectory
|
# This time rendering every trajectory
|
||||||
if render:
|
if render:
|
||||||
@ -113,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
wrappers = [MPWrapper]
|
wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper]
|
||||||
mp_kwargs = {
|
mp_kwargs = {
|
||||||
"num_dof": 5,
|
"num_dof": 5,
|
||||||
"num_basis": 5,
|
"num_basis": 5,
|
||||||
@ -125,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
"weights_scale": 50,
|
"weights_scale": 50,
|
||||||
"goal_scale": 0.1
|
"goal_scale": 0.1
|
||||||
}
|
}
|
||||||
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||||
# OR for a deterministic ProMP:
|
# OR for a deterministic ProMP:
|
||||||
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from alr_envs.utils.make_env_helpers import make_env
|
import alr_envs
|
||||||
|
|
||||||
|
|
||||||
def example_mp(env_name, seed=1):
|
def example_mp(env_name, seed=1):
|
||||||
@ -13,7 +13,7 @@ def example_mp(env_name, seed=1):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
||||||
env = make_env(env_name, seed)
|
env = alr_envs.make(env_name, seed)
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
@ -29,13 +29,13 @@ def example_mp(env_name, seed=1):
|
|||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# DMP - not supported yet
|
# DMP - not supported yet
|
||||||
#example_mp("ReacherDetPMP-v2")
|
# example_mp("ReacherDMP-v2")
|
||||||
|
|
||||||
# DetProMP
|
# DetProMP
|
||||||
example_mp("ContinuousMountainCarDetPMP-v0")
|
example_mp("ContinuousMountainCarDetPMP-v0")
|
||||||
example_mp("ReacherDetPMP-v2")
|
example_mp("ReacherDetPMP-v2")
|
||||||
example_mp("FetchReachDenseDetPMP-v1")
|
example_mp("FetchReachDenseDetPMP-v1")
|
||||||
example_mp("FetchSlideDenseDetPMP-v1")
|
example_mp("FetchSlideDenseDetPMP-v1")
|
||||||
|
|
||||||
|
26
alr_envs/meta/README.MD
Normal file
26
alr_envs/meta/README.MD
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# MetaWorld Wrappers
|
||||||
|
|
||||||
|
These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them.
|
||||||
|
All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized.
|
||||||
|
Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following:
|
||||||
|
```python
|
||||||
|
return np.hstack([
|
||||||
|
# Current observation
|
||||||
|
[False] * 3, # end-effector position
|
||||||
|
[False] * 1, # normalized gripper open distance
|
||||||
|
[True] * 3, # main object position
|
||||||
|
[False] * 4, # main object quaternion
|
||||||
|
[False] * 3, # secondary object position
|
||||||
|
[False] * 4, # secondary object quaternion
|
||||||
|
# Previous observation
|
||||||
|
[False] * 3, # previous end-effector position
|
||||||
|
[False] * 1, # previous normalized gripper open distance
|
||||||
|
[False] * 3, # previous main object position
|
||||||
|
[False] * 4, # previous main object quaternion
|
||||||
|
[False] * 3, # previous second object position
|
||||||
|
[False] * 4, # previous second object quaternion
|
||||||
|
# Goal
|
||||||
|
[True] * 3, # goal position
|
||||||
|
])
|
||||||
|
```
|
||||||
|
For other tasks only the boolean values have to be adjusted accordingly.
|
1
alr_envs/meta/__init__.py
Normal file
1
alr_envs/meta/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from alr_envs.meta import button_press
|
48
alr_envs/meta/button_press.py
Normal file
48
alr_envs/meta/button_press.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
from typing import Tuple, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
# This structure is the same for all metaworld environments.
|
||||||
|
# Only the observations which change could differ
|
||||||
|
return np.hstack([
|
||||||
|
# Current observation
|
||||||
|
[False] * 3, # end-effector position
|
||||||
|
[False] * 1, # normalized gripper open distance
|
||||||
|
[True] * 3, # main object position
|
||||||
|
[False] * 4, # main object quaternion
|
||||||
|
[False] * 3, # secondary object position
|
||||||
|
[False] * 4, # secondary object quaternion
|
||||||
|
# Previous observation
|
||||||
|
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
|
||||||
|
[False] * 3, # previous end-effector position
|
||||||
|
[False] * 1, # previous normalized gripper open distance
|
||||||
|
[False] * 3, # previous main object position
|
||||||
|
[False] * 4, # previous main object quaternion
|
||||||
|
[False] * 3, # previous second object position
|
||||||
|
[False] * 4, # previous second object quaternion
|
||||||
|
# Goal
|
||||||
|
[True] * 3, # goal position
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.env.physics.named.data.qpos[:]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
return self.env.physics.named.data.qvel[:]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
3
alr_envs/open_ai/README.MD
Normal file
3
alr_envs/open_ai/README.MD
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# OpenAI Gym Wrappers
|
||||||
|
|
||||||
|
These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments in order to use our Motion Primitive gym interface with them.
|
@ -4,8 +4,10 @@ from typing import Union
|
|||||||
import gym
|
import gym
|
||||||
from gym.envs.registration import register
|
from gym.envs.registration import register
|
||||||
|
|
||||||
|
from alr_envs.utils.make_env_helpers import make
|
||||||
|
|
||||||
def make(
|
|
||||||
|
def make_dmc(
|
||||||
id: str,
|
id: str,
|
||||||
seed: int = 1,
|
seed: int = 1,
|
||||||
visualize_reward: bool = True,
|
visualize_reward: bool = True,
|
||||||
|
@ -3,21 +3,22 @@ from typing import Iterable, List, Type, Union
|
|||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from gym.envs.registration import EnvSpec
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
from mp_env_api import MPEnvWrapper
|
||||||
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
||||||
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
||||||
|
|
||||||
|
|
||||||
def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||||
"""
|
"""
|
||||||
TODO: Do we need this?
|
TODO: Do we need this?
|
||||||
Generate a callable to create a new gym environment with a given seed.
|
Generate a callable to create a new gym environment with a given seed.
|
||||||
The rank is added to the seed and can be used for example when using vector environments.
|
The rank is added to the seed and can be used for example when using vector environments.
|
||||||
E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
||||||
with seeds 123 through 130.
|
with seeds 123 through 130.
|
||||||
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
|
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
|
||||||
Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_id: name of the environment
|
env_id: name of the environment
|
||||||
@ -30,12 +31,12 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, *
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def f():
|
def f():
|
||||||
return make_env(env_id, seed + rank, **kwargs)
|
return make(env_id, seed + rank, **kwargs)
|
||||||
|
|
||||||
return f if return_callable else f()
|
return f if return_callable else f()
|
||||||
|
|
||||||
|
|
||||||
def make_env(env_id: str, seed, **kwargs):
|
def make(env_id: str, seed, **kwargs):
|
||||||
"""
|
"""
|
||||||
Converts an env_id to an environment with the gym API.
|
Converts an env_id to an environment with the gym API.
|
||||||
This also works for DeepMind Control Suite interface_wrappers
|
This also works for DeepMind Control Suite interface_wrappers
|
||||||
@ -58,13 +59,30 @@ def make_env(env_id: str, seed, **kwargs):
|
|||||||
env.action_space.seed(seed)
|
env.action_space.seed(seed)
|
||||||
env.observation_space.seed(seed)
|
env.observation_space.seed(seed)
|
||||||
except gym.error.Error:
|
except gym.error.Error:
|
||||||
# DMC
|
|
||||||
from alr_envs.utils import make
|
|
||||||
env = make(env_id, seed=seed, **kwargs)
|
|
||||||
|
|
||||||
assert env.base_step_limit == env.spec.max_episode_steps, \
|
# MetaWorld env
|
||||||
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
|
import metaworld
|
||||||
f"the DMC environment specification of {env.base_step_limit} steps."
|
if env_id in metaworld.ML1.ENV_NAMES:
|
||||||
|
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
||||||
|
# setting this avoids generating the same initialization after each reset
|
||||||
|
env._freeze_rand_vec = False
|
||||||
|
# Manually set spec, as metaworld environments are not registered via gym
|
||||||
|
env.unwrapped.spec = EnvSpec(env_id)
|
||||||
|
# Set Timelimit based on the maximum allowed path length of the environment
|
||||||
|
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
|
||||||
|
env.seed(seed)
|
||||||
|
env.action_space.seed(seed)
|
||||||
|
env.observation_space.seed(seed)
|
||||||
|
env.goal_space.seed(seed)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# DMC
|
||||||
|
from alr_envs.utils import make_dmc
|
||||||
|
env = make_dmc(env_id, seed=seed, **kwargs)
|
||||||
|
|
||||||
|
assert env.base_step_limit == env.spec.max_episode_steps, \
|
||||||
|
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
|
||||||
|
f"the DMC environment specification of {env.base_step_limit} steps."
|
||||||
|
|
||||||
return env
|
return env
|
||||||
|
|
||||||
@ -84,7 +102,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
# _env = gym.make(env_id)
|
# _env = gym.make(env_id)
|
||||||
_env = make_env(env_id, seed, **kwargs)
|
_env = make(env_id, seed, **kwargs)
|
||||||
|
|
||||||
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
|
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
|
||||||
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
|
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
|
||||||
@ -175,7 +193,7 @@ def make_detpmp_env_helper(**kwargs):
|
|||||||
|
|
||||||
|
|
||||||
def make_contextual_env(env_id, context, seed, rank):
|
def make_contextual_env(env_id, context, seed, rank):
|
||||||
env = make_env(env_id, seed + rank, context=context)
|
env = make(env_id, seed + rank, context=context)
|
||||||
# env = gym.make(env_id, context=context)
|
# env = gym.make(env_id, context=context)
|
||||||
# env.seed(seed + rank)
|
# env.seed(seed + rank)
|
||||||
return lambda: env
|
return lambda: env
|
||||||
|
@ -3,7 +3,7 @@ from gym.vector.async_vector_env import AsyncVectorEnv
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from _collections import defaultdict
|
from _collections import defaultdict
|
||||||
|
|
||||||
from alr_envs.utils.make_env_helpers import make_env_rank
|
from alr_envs.utils.make_env_helpers import make_rank
|
||||||
|
|
||||||
|
|
||||||
def split_array(ary, size):
|
def split_array(ary, size):
|
||||||
@ -54,7 +54,7 @@ class AlrMpEnvSampler:
|
|||||||
|
|
||||||
def __init__(self, env_id, num_envs, seed=0, **env_kwargs):
|
def __init__(self, env_id, num_envs, seed=0, **env_kwargs):
|
||||||
self.num_envs = num_envs
|
self.num_envs = num_envs
|
||||||
self.env = AsyncVectorEnv([make_env_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
|
self.env = AsyncVectorEnv([make_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
|
||||||
|
|
||||||
def __call__(self, params):
|
def __call__(self, params):
|
||||||
params = np.atleast_2d(params)
|
params = np.atleast_2d(params)
|
||||||
|
1
setup.py
1
setup.py
@ -12,6 +12,7 @@ setup(
|
|||||||
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
||||||
'mujoco-py<2.1,>=2.0',
|
'mujoco-py<2.1,>=2.0',
|
||||||
'dm_control'
|
'dm_control'
|
||||||
|
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld'
|
||||||
],
|
],
|
||||||
|
|
||||||
url='https://github.com/ALRhub/alr_envs/',
|
url='https://github.com/ALRhub/alr_envs/',
|
||||||
|
127
test/test_dmc_envs.py
Normal file
127
test/test_dmc_envs.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from dm_control import suite, manipulation
|
||||||
|
|
||||||
|
from alr_envs import make
|
||||||
|
|
||||||
|
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
|
||||||
|
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
|
||||||
|
SEED = 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnvironments(unittest.TestCase):
|
||||||
|
|
||||||
|
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
|
||||||
|
"""
|
||||||
|
Example for running a DMC based env in the step based setting.
|
||||||
|
The env_id has to be specified as `domain_name-task_name` or
|
||||||
|
for manipulation tasks as `manipulation-environment_name`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
seed= random seeding
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
env: gym.Env = make(env_id, seed=seed)
|
||||||
|
rewards = []
|
||||||
|
observations = []
|
||||||
|
dones = []
|
||||||
|
obs = env.reset()
|
||||||
|
self._verify_observations(obs, env.observation_space, "reset()")
|
||||||
|
|
||||||
|
length = env.spec.max_episode_steps
|
||||||
|
if iterations is None:
|
||||||
|
if length is None:
|
||||||
|
iterations = 1
|
||||||
|
else:
|
||||||
|
iterations = length
|
||||||
|
|
||||||
|
# number of samples(multiple environment steps)
|
||||||
|
for i in range(iterations):
|
||||||
|
observations.append(obs)
|
||||||
|
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
|
||||||
|
self._verify_observations(obs, env.observation_space, "step()")
|
||||||
|
self._verify_reward(reward)
|
||||||
|
self._verify_done(done)
|
||||||
|
|
||||||
|
rewards.append(reward)
|
||||||
|
dones.append(done)
|
||||||
|
|
||||||
|
if render:
|
||||||
|
env.render("human")
|
||||||
|
|
||||||
|
if done:
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
assert done, "Done flag is not True after max episode length."
|
||||||
|
observations.append(obs)
|
||||||
|
env.close()
|
||||||
|
del env
|
||||||
|
return np.array(observations), np.array(rewards), np.array(dones)
|
||||||
|
|
||||||
|
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
||||||
|
self.assertTrue(observation_space.contains(obs),
|
||||||
|
f"Observation {obs} received from {obs_type} "
|
||||||
|
f"not contained in observation space {observation_space}.")
|
||||||
|
|
||||||
|
def _verify_reward(self, reward):
|
||||||
|
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||||
|
|
||||||
|
def _verify_done(self, done):
|
||||||
|
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||||
|
|
||||||
|
def test_dmc_functionality(self):
|
||||||
|
"""Tests that environments runs without errors using random actions."""
|
||||||
|
for env_id in DMC_ENVS:
|
||||||
|
with self.subTest(msg=env_id):
|
||||||
|
self._run_env(env_id)
|
||||||
|
|
||||||
|
def test_dmc_determinism(self):
|
||||||
|
"""Tests that identical seeds produce identical trajectories."""
|
||||||
|
seed = 0
|
||||||
|
# Iterate over two trajectories, which should have the same state and action sequence
|
||||||
|
for env_id in DMC_ENVS:
|
||||||
|
with self.subTest(msg=env_id):
|
||||||
|
traj1 = self._run_env(env_id, seed=seed)
|
||||||
|
traj2 = self._run_env(env_id, seed=seed)
|
||||||
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
|
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||||
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||||
|
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
|
||||||
|
def test_manipulation_functionality(self):
|
||||||
|
"""Tests that environments runs without errors using random actions."""
|
||||||
|
for env_id in MANIPULATION_SPECS:
|
||||||
|
with self.subTest(msg=env_id):
|
||||||
|
self._run_env(env_id)
|
||||||
|
|
||||||
|
def test_manipulation_determinism(self):
|
||||||
|
"""Tests that identical seeds produce identical trajectories."""
|
||||||
|
seed = 0
|
||||||
|
# Iterate over two trajectories, which should have the same state and action sequence
|
||||||
|
for env_id in MANIPULATION_SPECS:
|
||||||
|
with self.subTest(msg=env_id):
|
||||||
|
traj1 = self._run_env(env_id, seed=seed)
|
||||||
|
traj2 = self._run_env(env_id, seed=seed)
|
||||||
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
|
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||||
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||||
|
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -4,7 +4,7 @@ import gym
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import alr_envs # noqa
|
import alr_envs # noqa
|
||||||
from alr_envs.utils.make_env_helpers import make_env
|
from alr_envs.utils.make_env_helpers import make
|
||||||
|
|
||||||
ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point)
|
ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point)
|
||||||
SEED = 1
|
SEED = 1
|
||||||
@ -27,7 +27,7 @@ class TestEnvironments(unittest.TestCase):
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
env: gym.Env = make_env(env_id, seed=seed)
|
env: gym.Env = make(env_id, seed=seed)
|
||||||
rewards = []
|
rewards = []
|
||||||
observations = []
|
observations = []
|
||||||
dones = []
|
dones = []
|
||||||
@ -62,6 +62,7 @@ class TestEnvironments(unittest.TestCase):
|
|||||||
if done:
|
if done:
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
assert done, "Done flag is not True after max episode length."
|
||||||
observations.append(obs)
|
observations.append(obs)
|
||||||
env.close()
|
env.close()
|
||||||
del env
|
del env
|
||||||
@ -81,7 +82,6 @@ class TestEnvironments(unittest.TestCase):
|
|||||||
def test_environment_functionality(self):
|
def test_environment_functionality(self):
|
||||||
"""Tests that environments runs without errors using random actions."""
|
"""Tests that environments runs without errors using random actions."""
|
||||||
for spec in ALL_SPECS:
|
for spec in ALL_SPECS:
|
||||||
# try:
|
|
||||||
with self.subTest(msg=spec.id):
|
with self.subTest(msg=spec.id):
|
||||||
self._run_env(spec.id)
|
self._run_env(spec.id)
|
||||||
|
|
||||||
@ -91,7 +91,6 @@ class TestEnvironments(unittest.TestCase):
|
|||||||
# Iterate over two trajectories, which should have the same state and action sequence
|
# Iterate over two trajectories, which should have the same state and action sequence
|
||||||
for spec in ALL_SPECS:
|
for spec in ALL_SPECS:
|
||||||
with self.subTest(msg=spec.id):
|
with self.subTest(msg=spec.id):
|
||||||
self._run_env(spec.id)
|
|
||||||
traj1 = self._run_env(spec.id, seed=seed)
|
traj1 = self._run_env(spec.id, seed=seed)
|
||||||
traj2 = self._run_env(spec.id, seed=seed)
|
traj2 = self._run_env(spec.id, seed=seed)
|
||||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
|
107
test/test_metaworld_envs.py
Normal file
107
test/test_metaworld_envs.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
import unittest
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from alr_envs import make
|
||||||
|
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
|
||||||
|
|
||||||
|
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
|
||||||
|
SEED = 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnvironments(unittest.TestCase):
|
||||||
|
|
||||||
|
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
|
||||||
|
"""
|
||||||
|
Example for running a DMC based env in the step based setting.
|
||||||
|
The env_id has to be specified as `domain_name-task_name` or
|
||||||
|
for manipulation tasks as `manipulation-environment_name`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
seed= random seeding
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
env: gym.Env = make(env_id, seed=seed)
|
||||||
|
rewards = []
|
||||||
|
observations = []
|
||||||
|
actions = []
|
||||||
|
dones = []
|
||||||
|
obs = env.reset()
|
||||||
|
self._verify_observations(obs, env.observation_space, "reset()")
|
||||||
|
|
||||||
|
length = env.max_path_length
|
||||||
|
if iterations is None:
|
||||||
|
if length is None:
|
||||||
|
iterations = 1
|
||||||
|
else:
|
||||||
|
iterations = length
|
||||||
|
|
||||||
|
# number of samples(multiple environment steps)
|
||||||
|
for i in range(iterations):
|
||||||
|
observations.append(obs)
|
||||||
|
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
actions.append(ac)
|
||||||
|
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
|
||||||
|
self._verify_observations(obs, env.observation_space, "step()")
|
||||||
|
self._verify_reward(reward)
|
||||||
|
self._verify_done(done)
|
||||||
|
|
||||||
|
rewards.append(reward)
|
||||||
|
dones.append(done)
|
||||||
|
|
||||||
|
if render:
|
||||||
|
env.render("human")
|
||||||
|
|
||||||
|
if done:
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
assert done, "Done flag is not True after max episode length."
|
||||||
|
observations.append(obs)
|
||||||
|
env.close()
|
||||||
|
del env
|
||||||
|
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
|
||||||
|
|
||||||
|
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
||||||
|
self.assertTrue(observation_space.contains(obs),
|
||||||
|
f"Observation {obs} received from {obs_type} "
|
||||||
|
f"not contained in observation space {observation_space}.")
|
||||||
|
|
||||||
|
def _verify_reward(self, reward):
|
||||||
|
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||||
|
|
||||||
|
def _verify_done(self, done):
|
||||||
|
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||||
|
|
||||||
|
def test_dmc_functionality(self):
|
||||||
|
"""Tests that environments runs without errors using random actions."""
|
||||||
|
for env_id in ALL_ENVS:
|
||||||
|
with self.subTest(msg=env_id):
|
||||||
|
self._run_env(env_id)
|
||||||
|
|
||||||
|
def test_dmc_determinism(self):
|
||||||
|
"""Tests that identical seeds produce identical trajectories."""
|
||||||
|
seed = 0
|
||||||
|
# Iterate over two trajectories, which should have the same state and action sequence
|
||||||
|
for env_id in ALL_ENVS:
|
||||||
|
with self.subTest(msg=env_id):
|
||||||
|
traj1 = self._run_env(env_id, seed=seed)
|
||||||
|
traj2 = self._run_env(env_id, seed=seed)
|
||||||
|
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||||
|
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||||
|
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||||
|
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||||
|
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||||
|
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in New Issue
Block a user