integrated metaworld tasks into the framework
This commit is contained in:
parent
a11965827d
commit
9b1ccb3235
@ -1,12 +1,12 @@
|
||||
from gym.envs.registration import register
|
||||
from gym.wrappers import FlattenObservation
|
||||
|
||||
from alr_envs import classic_control, dmc, open_ai
|
||||
from alr_envs import classic_control, dmc, open_ai, meta
|
||||
|
||||
from alr_envs.utils.make_env_helpers import make_dmp_env
|
||||
from alr_envs.utils.make_env_helpers import make_detpmp_env
|
||||
from alr_envs.utils.make_env_helpers import make_env
|
||||
from alr_envs.utils.make_env_helpers import make_env_rank
|
||||
from alr_envs.utils.make_env_helpers import make
|
||||
from alr_envs.utils.make_env_helpers import make_rank
|
||||
|
||||
# Mujoco
|
||||
|
||||
@ -305,13 +305,13 @@ register(
|
||||
# max_episode_steps=1,
|
||||
kwargs={
|
||||
"name": f"ball_in_cup-catch",
|
||||
"time_limit": 1,
|
||||
"episode_length": 50,
|
||||
"time_limit": 2,
|
||||
"episode_length": 100,
|
||||
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 1,
|
||||
"duration": 2,
|
||||
"learn_goal": True,
|
||||
"alpha_phase": 2,
|
||||
"bandwidth_factor": 2,
|
||||
@ -331,16 +331,16 @@ register(
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||
kwargs={
|
||||
"name": f"ball_in_cup-catch",
|
||||
"time_limit": 1,
|
||||
"episode_length": 50,
|
||||
"time_limit": 2,
|
||||
"episode_length": 100,
|
||||
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 2,
|
||||
"num_basis": 5,
|
||||
"duration": 1,
|
||||
"duration": 2,
|
||||
"width": 0.025,
|
||||
"policy_type": "motor",
|
||||
"weights_scale": 0.2,
|
||||
"weights_scale": 1,
|
||||
"zero_start": True,
|
||||
"policy_kwargs": {
|
||||
"p_gains": 50,
|
||||
@ -875,6 +875,23 @@ register(
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='FetchSlideDetPMP-v1',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||
kwargs={
|
||||
"name": "gym.envs.robotics:FetchSlide-v1",
|
||||
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 4,
|
||||
"num_basis": 5,
|
||||
"duration": 2,
|
||||
"post_traj_time": 0,
|
||||
"width": 0.02,
|
||||
"policy_type": "position"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='FetchReachDenseDetPMP-v1',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||
@ -891,3 +908,38 @@ register(
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='FetchReachDetPMP-v1',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||
kwargs={
|
||||
"name": "gym.envs.robotics:FetchReach-v1",
|
||||
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 4,
|
||||
"num_basis": 5,
|
||||
"duration": 2,
|
||||
"post_traj_time": 0,
|
||||
"width": 0.02,
|
||||
"policy_type": "position"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='ButtonPressDetPMP-v2',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||
kwargs={
|
||||
"name": "button-press-v2",
|
||||
"wrappers": [meta.button_press.MPWrapper],
|
||||
"mp_kwargs": {
|
||||
"num_dof": 4,
|
||||
"num_basis": 5,
|
||||
"duration": 6.25,
|
||||
"post_traj_time": 0,
|
||||
"width": 0.025,
|
||||
"policy_type": "position"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
|
3
alr_envs/dmc/README.MD
Normal file
3
alr_envs/dmc/README.MD
Normal file
@ -0,0 +1,3 @@
|
||||
# DeepMind Control (DMC) Wrappers
|
||||
|
||||
These are the Environment Wrappers for selected [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them.
|
@ -17,7 +17,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env = alr_envs.make_env(env_id, seed)
|
||||
env = alr_envs.make(env_id, seed)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
print("observation shape:", env.observation_space.shape)
|
||||
|
@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
|
||||
|
||||
"""
|
||||
|
||||
env = alr_envs.make_env(env_id, seed)
|
||||
env = alr_envs.make(env_id, seed)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
print("Observation shape: ", env.observation_space.shape)
|
||||
@ -56,7 +56,7 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
|
||||
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
|
||||
|
||||
"""
|
||||
env = gym.vector.AsyncVectorEnv([alr_envs.make_env_rank(env_id, seed, i) for i in range(n_cpu)])
|
||||
env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)])
|
||||
# OR
|
||||
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
from alr_envs import MPWrapper
|
||||
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
||||
import alr_envs
|
||||
|
||||
|
||||
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
|
||||
@ -16,7 +15,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
|
||||
"""
|
||||
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
||||
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
|
||||
env = make_env(env_name, seed)
|
||||
env = alr_envs.make(env_name, seed)
|
||||
|
||||
rewards = 0
|
||||
# env.render(mode=None)
|
||||
@ -71,7 +70,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1
|
||||
}
|
||||
env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
|
||||
env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
|
||||
|
||||
# This time rendering every trajectory
|
||||
if render:
|
||||
@ -113,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
|
||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||
# You can also add other gym.Wrappers in case they are needed.
|
||||
wrappers = [MPWrapper]
|
||||
wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper]
|
||||
mp_kwargs = {
|
||||
"num_dof": 5,
|
||||
"num_basis": 5,
|
||||
@ -125,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
"weights_scale": 50,
|
||||
"goal_scale": 0.1
|
||||
}
|
||||
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||
# OR for a deterministic ProMP:
|
||||
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
from alr_envs.utils.make_env_helpers import make_env
|
||||
import alr_envs
|
||||
|
||||
|
||||
def example_mp(env_name, seed=1):
|
||||
@ -13,7 +13,7 @@ def example_mp(env_name, seed=1):
|
||||
|
||||
"""
|
||||
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
||||
env = make_env(env_name, seed)
|
||||
env = alr_envs.make(env_name, seed)
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
@ -29,13 +29,13 @@ def example_mp(env_name, seed=1):
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# DMP - not supported yet
|
||||
#example_mp("ReacherDetPMP-v2")
|
||||
# example_mp("ReacherDMP-v2")
|
||||
|
||||
# DetProMP
|
||||
example_mp("ContinuousMountainCarDetPMP-v0")
|
||||
example_mp("ReacherDetPMP-v2")
|
||||
example_mp("FetchReachDenseDetPMP-v1")
|
||||
example_mp("FetchSlideDenseDetPMP-v1")
|
||||
|
||||
|
26
alr_envs/meta/README.MD
Normal file
26
alr_envs/meta/README.MD
Normal file
@ -0,0 +1,26 @@
|
||||
# MetaWorld Wrappers
|
||||
|
||||
These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them.
|
||||
All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized.
|
||||
Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following:
|
||||
```python
|
||||
return np.hstack([
|
||||
# Current observation
|
||||
[False] * 3, # end-effector position
|
||||
[False] * 1, # normalized gripper open distance
|
||||
[True] * 3, # main object position
|
||||
[False] * 4, # main object quaternion
|
||||
[False] * 3, # secondary object position
|
||||
[False] * 4, # secondary object quaternion
|
||||
# Previous observation
|
||||
[False] * 3, # previous end-effector position
|
||||
[False] * 1, # previous normalized gripper open distance
|
||||
[False] * 3, # previous main object position
|
||||
[False] * 4, # previous main object quaternion
|
||||
[False] * 3, # previous second object position
|
||||
[False] * 4, # previous second object quaternion
|
||||
# Goal
|
||||
[True] * 3, # goal position
|
||||
])
|
||||
```
|
||||
For other tasks only the boolean values have to be adjusted accordingly.
|
1
alr_envs/meta/__init__.py
Normal file
1
alr_envs/meta/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from alr_envs.meta import button_press
|
48
alr_envs/meta/button_press.py
Normal file
48
alr_envs/meta/button_press.py
Normal file
@ -0,0 +1,48 @@
|
||||
from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mp_env_api import MPEnvWrapper
|
||||
|
||||
|
||||
class MPWrapper(MPEnvWrapper):
|
||||
|
||||
@property
|
||||
def active_obs(self):
|
||||
# This structure is the same for all metaworld environments.
|
||||
# Only the observations which change could differ
|
||||
return np.hstack([
|
||||
# Current observation
|
||||
[False] * 3, # end-effector position
|
||||
[False] * 1, # normalized gripper open distance
|
||||
[True] * 3, # main object position
|
||||
[False] * 4, # main object quaternion
|
||||
[False] * 3, # secondary object position
|
||||
[False] * 4, # secondary object quaternion
|
||||
# Previous observation
|
||||
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
|
||||
[False] * 3, # previous end-effector position
|
||||
[False] * 1, # previous normalized gripper open distance
|
||||
[False] * 3, # previous main object position
|
||||
[False] * 4, # previous main object quaternion
|
||||
[False] * 3, # previous second object position
|
||||
[False] * 4, # previous second object quaternion
|
||||
# Goal
|
||||
[True] * 3, # goal position
|
||||
])
|
||||
|
||||
@property
|
||||
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||
return self.env.physics.named.data.qpos[:]
|
||||
|
||||
@property
|
||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
return self.env.physics.named.data.qvel[:]
|
||||
|
||||
@property
|
||||
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||
|
||||
@property
|
||||
def dt(self) -> Union[float, int]:
|
||||
return self.env.dt
|
3
alr_envs/open_ai/README.MD
Normal file
3
alr_envs/open_ai/README.MD
Normal file
@ -0,0 +1,3 @@
|
||||
# OpenAI Gym Wrappers
|
||||
|
||||
These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments in order to use our Motion Primitive gym interface with them.
|
@ -4,8 +4,10 @@ from typing import Union
|
||||
import gym
|
||||
from gym.envs.registration import register
|
||||
|
||||
from alr_envs.utils.make_env_helpers import make
|
||||
|
||||
def make(
|
||||
|
||||
def make_dmc(
|
||||
id: str,
|
||||
seed: int = 1,
|
||||
visualize_reward: bool = True,
|
||||
|
@ -3,21 +3,22 @@ from typing import Iterable, List, Type, Union
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.envs.registration import EnvSpec
|
||||
|
||||
from mp_env_api import MPEnvWrapper
|
||||
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
||||
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
||||
|
||||
|
||||
def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||
"""
|
||||
TODO: Do we need this?
|
||||
Generate a callable to create a new gym environment with a given seed.
|
||||
The rank is added to the seed and can be used for example when using vector environments.
|
||||
E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
||||
E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
||||
with seeds 123 through 130.
|
||||
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
|
||||
Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
||||
Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
||||
|
||||
Args:
|
||||
env_id: name of the environment
|
||||
@ -30,12 +31,12 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, *
|
||||
"""
|
||||
|
||||
def f():
|
||||
return make_env(env_id, seed + rank, **kwargs)
|
||||
return make(env_id, seed + rank, **kwargs)
|
||||
|
||||
return f if return_callable else f()
|
||||
|
||||
|
||||
def make_env(env_id: str, seed, **kwargs):
|
||||
def make(env_id: str, seed, **kwargs):
|
||||
"""
|
||||
Converts an env_id to an environment with the gym API.
|
||||
This also works for DeepMind Control Suite interface_wrappers
|
||||
@ -58,13 +59,30 @@ def make_env(env_id: str, seed, **kwargs):
|
||||
env.action_space.seed(seed)
|
||||
env.observation_space.seed(seed)
|
||||
except gym.error.Error:
|
||||
# DMC
|
||||
from alr_envs.utils import make
|
||||
env = make(env_id, seed=seed, **kwargs)
|
||||
|
||||
assert env.base_step_limit == env.spec.max_episode_steps, \
|
||||
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
|
||||
f"the DMC environment specification of {env.base_step_limit} steps."
|
||||
# MetaWorld env
|
||||
import metaworld
|
||||
if env_id in metaworld.ML1.ENV_NAMES:
|
||||
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
|
||||
# setting this avoids generating the same initialization after each reset
|
||||
env._freeze_rand_vec = False
|
||||
# Manually set spec, as metaworld environments are not registered via gym
|
||||
env.unwrapped.spec = EnvSpec(env_id)
|
||||
# Set Timelimit based on the maximum allowed path length of the environment
|
||||
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
|
||||
env.seed(seed)
|
||||
env.action_space.seed(seed)
|
||||
env.observation_space.seed(seed)
|
||||
env.goal_space.seed(seed)
|
||||
|
||||
else:
|
||||
# DMC
|
||||
from alr_envs.utils import make_dmc
|
||||
env = make_dmc(env_id, seed=seed, **kwargs)
|
||||
|
||||
assert env.base_step_limit == env.spec.max_episode_steps, \
|
||||
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
|
||||
f"the DMC environment specification of {env.base_step_limit} steps."
|
||||
|
||||
return env
|
||||
|
||||
@ -84,7 +102,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
|
||||
|
||||
"""
|
||||
# _env = gym.make(env_id)
|
||||
_env = make_env(env_id, seed, **kwargs)
|
||||
_env = make(env_id, seed, **kwargs)
|
||||
|
||||
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
|
||||
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
|
||||
@ -175,7 +193,7 @@ def make_detpmp_env_helper(**kwargs):
|
||||
|
||||
|
||||
def make_contextual_env(env_id, context, seed, rank):
|
||||
env = make_env(env_id, seed + rank, context=context)
|
||||
env = make(env_id, seed + rank, context=context)
|
||||
# env = gym.make(env_id, context=context)
|
||||
# env.seed(seed + rank)
|
||||
return lambda: env
|
||||
|
@ -3,7 +3,7 @@ from gym.vector.async_vector_env import AsyncVectorEnv
|
||||
import numpy as np
|
||||
from _collections import defaultdict
|
||||
|
||||
from alr_envs.utils.make_env_helpers import make_env_rank
|
||||
from alr_envs.utils.make_env_helpers import make_rank
|
||||
|
||||
|
||||
def split_array(ary, size):
|
||||
@ -54,7 +54,7 @@ class AlrMpEnvSampler:
|
||||
|
||||
def __init__(self, env_id, num_envs, seed=0, **env_kwargs):
|
||||
self.num_envs = num_envs
|
||||
self.env = AsyncVectorEnv([make_env_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
|
||||
self.env = AsyncVectorEnv([make_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
|
||||
|
||||
def __call__(self, params):
|
||||
params = np.atleast_2d(params)
|
||||
|
1
setup.py
1
setup.py
@ -12,6 +12,7 @@ setup(
|
||||
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
||||
'mujoco-py<2.1,>=2.0',
|
||||
'dm_control'
|
||||
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld'
|
||||
],
|
||||
|
||||
url='https://github.com/ALRhub/alr_envs/',
|
||||
|
127
test/test_dmc_envs.py
Normal file
127
test/test_dmc_envs.py
Normal file
@ -0,0 +1,127 @@
|
||||
import unittest
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
|
||||
from dm_control import suite, manipulation
|
||||
|
||||
from alr_envs import make
|
||||
|
||||
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
|
||||
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
|
||||
SEED = 1
|
||||
|
||||
|
||||
class TestEnvironments(unittest.TestCase):
|
||||
|
||||
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
|
||||
"""
|
||||
Example for running a DMC based env in the step based setting.
|
||||
The env_id has to be specified as `domain_name-task_name` or
|
||||
for manipulation tasks as `manipulation-environment_name`
|
||||
|
||||
Args:
|
||||
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
|
||||
iterations: Number of rollout steps to run
|
||||
seed= random seeding
|
||||
render: Render the episode
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env: gym.Env = make(env_id, seed=seed)
|
||||
rewards = []
|
||||
observations = []
|
||||
dones = []
|
||||
obs = env.reset()
|
||||
self._verify_observations(obs, env.observation_space, "reset()")
|
||||
|
||||
length = env.spec.max_episode_steps
|
||||
if iterations is None:
|
||||
if length is None:
|
||||
iterations = 1
|
||||
else:
|
||||
iterations = length
|
||||
|
||||
# number of samples(multiple environment steps)
|
||||
for i in range(iterations):
|
||||
observations.append(obs)
|
||||
|
||||
ac = env.action_space.sample()
|
||||
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||
obs, reward, done, info = env.step(ac)
|
||||
|
||||
self._verify_observations(obs, env.observation_space, "step()")
|
||||
self._verify_reward(reward)
|
||||
self._verify_done(done)
|
||||
|
||||
rewards.append(reward)
|
||||
dones.append(done)
|
||||
|
||||
if render:
|
||||
env.render("human")
|
||||
|
||||
if done:
|
||||
obs = env.reset()
|
||||
|
||||
assert done, "Done flag is not True after max episode length."
|
||||
observations.append(obs)
|
||||
env.close()
|
||||
del env
|
||||
return np.array(observations), np.array(rewards), np.array(dones)
|
||||
|
||||
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
||||
self.assertTrue(observation_space.contains(obs),
|
||||
f"Observation {obs} received from {obs_type} "
|
||||
f"not contained in observation space {observation_space}.")
|
||||
|
||||
def _verify_reward(self, reward):
|
||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||
|
||||
def _verify_done(self, done):
|
||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||
|
||||
def test_dmc_functionality(self):
|
||||
"""Tests that environments runs without errors using random actions."""
|
||||
for env_id in DMC_ENVS:
|
||||
with self.subTest(msg=env_id):
|
||||
self._run_env(env_id)
|
||||
|
||||
def test_dmc_determinism(self):
|
||||
"""Tests that identical seeds produce identical trajectories."""
|
||||
seed = 0
|
||||
# Iterate over two trajectories, which should have the same state and action sequence
|
||||
for env_id in DMC_ENVS:
|
||||
with self.subTest(msg=env_id):
|
||||
traj1 = self._run_env(env_id, seed=seed)
|
||||
traj2 = self._run_env(env_id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
def test_manipulation_functionality(self):
|
||||
"""Tests that environments runs without errors using random actions."""
|
||||
for env_id in MANIPULATION_SPECS:
|
||||
with self.subTest(msg=env_id):
|
||||
self._run_env(env_id)
|
||||
|
||||
def test_manipulation_determinism(self):
|
||||
"""Tests that identical seeds produce identical trajectories."""
|
||||
seed = 0
|
||||
# Iterate over two trajectories, which should have the same state and action sequence
|
||||
for env_id in MANIPULATION_SPECS:
|
||||
with self.subTest(msg=env_id):
|
||||
traj1 = self._run_env(env_id, seed=seed)
|
||||
traj2 = self._run_env(env_id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
|
||||
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -4,7 +4,7 @@ import gym
|
||||
import numpy as np
|
||||
|
||||
import alr_envs # noqa
|
||||
from alr_envs.utils.make_env_helpers import make_env
|
||||
from alr_envs.utils.make_env_helpers import make
|
||||
|
||||
ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point)
|
||||
SEED = 1
|
||||
@ -27,7 +27,7 @@ class TestEnvironments(unittest.TestCase):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env: gym.Env = make_env(env_id, seed=seed)
|
||||
env: gym.Env = make(env_id, seed=seed)
|
||||
rewards = []
|
||||
observations = []
|
||||
dones = []
|
||||
@ -62,6 +62,7 @@ class TestEnvironments(unittest.TestCase):
|
||||
if done:
|
||||
obs = env.reset()
|
||||
|
||||
assert done, "Done flag is not True after max episode length."
|
||||
observations.append(obs)
|
||||
env.close()
|
||||
del env
|
||||
@ -81,7 +82,6 @@ class TestEnvironments(unittest.TestCase):
|
||||
def test_environment_functionality(self):
|
||||
"""Tests that environments runs without errors using random actions."""
|
||||
for spec in ALL_SPECS:
|
||||
# try:
|
||||
with self.subTest(msg=spec.id):
|
||||
self._run_env(spec.id)
|
||||
|
||||
@ -91,7 +91,6 @@ class TestEnvironments(unittest.TestCase):
|
||||
# Iterate over two trajectories, which should have the same state and action sequence
|
||||
for spec in ALL_SPECS:
|
||||
with self.subTest(msg=spec.id):
|
||||
self._run_env(spec.id)
|
||||
traj1 = self._run_env(spec.id, seed=seed)
|
||||
traj2 = self._run_env(spec.id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
|
107
test/test_metaworld_envs.py
Normal file
107
test/test_metaworld_envs.py
Normal file
@ -0,0 +1,107 @@
|
||||
import unittest
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
|
||||
from alr_envs import make
|
||||
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
|
||||
|
||||
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
|
||||
SEED = 1
|
||||
|
||||
|
||||
class TestEnvironments(unittest.TestCase):
|
||||
|
||||
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
|
||||
"""
|
||||
Example for running a DMC based env in the step based setting.
|
||||
The env_id has to be specified as `domain_name-task_name` or
|
||||
for manipulation tasks as `manipulation-environment_name`
|
||||
|
||||
Args:
|
||||
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
|
||||
iterations: Number of rollout steps to run
|
||||
seed= random seeding
|
||||
render: Render the episode
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
env: gym.Env = make(env_id, seed=seed)
|
||||
rewards = []
|
||||
observations = []
|
||||
actions = []
|
||||
dones = []
|
||||
obs = env.reset()
|
||||
self._verify_observations(obs, env.observation_space, "reset()")
|
||||
|
||||
length = env.max_path_length
|
||||
if iterations is None:
|
||||
if length is None:
|
||||
iterations = 1
|
||||
else:
|
||||
iterations = length
|
||||
|
||||
# number of samples(multiple environment steps)
|
||||
for i in range(iterations):
|
||||
observations.append(obs)
|
||||
|
||||
ac = env.action_space.sample()
|
||||
actions.append(ac)
|
||||
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
|
||||
obs, reward, done, info = env.step(ac)
|
||||
|
||||
self._verify_observations(obs, env.observation_space, "step()")
|
||||
self._verify_reward(reward)
|
||||
self._verify_done(done)
|
||||
|
||||
rewards.append(reward)
|
||||
dones.append(done)
|
||||
|
||||
if render:
|
||||
env.render("human")
|
||||
|
||||
if done:
|
||||
obs = env.reset()
|
||||
|
||||
assert done, "Done flag is not True after max episode length."
|
||||
observations.append(obs)
|
||||
env.close()
|
||||
del env
|
||||
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
|
||||
|
||||
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
|
||||
self.assertTrue(observation_space.contains(obs),
|
||||
f"Observation {obs} received from {obs_type} "
|
||||
f"not contained in observation space {observation_space}.")
|
||||
|
||||
def _verify_reward(self, reward):
|
||||
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
|
||||
|
||||
def _verify_done(self, done):
|
||||
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
|
||||
|
||||
def test_dmc_functionality(self):
|
||||
"""Tests that environments runs without errors using random actions."""
|
||||
for env_id in ALL_ENVS:
|
||||
with self.subTest(msg=env_id):
|
||||
self._run_env(env_id)
|
||||
|
||||
def test_dmc_determinism(self):
|
||||
"""Tests that identical seeds produce identical trajectories."""
|
||||
seed = 0
|
||||
# Iterate over two trajectories, which should have the same state and action sequence
|
||||
for env_id in ALL_ENVS:
|
||||
with self.subTest(msg=env_id):
|
||||
traj1 = self._run_env(env_id, seed=seed)
|
||||
traj2 = self._run_env(env_id, seed=seed)
|
||||
for i, time_step in enumerate(zip(*traj1, *traj2)):
|
||||
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
|
||||
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
|
||||
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
|
||||
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
|
||||
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in New Issue
Block a user