diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 5a92082..5615be4 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -1,12 +1,12 @@ from gym.envs.registration import register from gym.wrappers import FlattenObservation -from alr_envs import classic_control, dmc, open_ai +from alr_envs import classic_control, dmc, open_ai, meta from alr_envs.utils.make_env_helpers import make_dmp_env from alr_envs.utils.make_env_helpers import make_detpmp_env -from alr_envs.utils.make_env_helpers import make_env -from alr_envs.utils.make_env_helpers import make_env_rank +from alr_envs.utils.make_env_helpers import make +from alr_envs.utils.make_env_helpers import make_rank # Mujoco @@ -305,13 +305,13 @@ register( # max_episode_steps=1, kwargs={ "name": f"ball_in_cup-catch", - "time_limit": 1, - "episode_length": 50, + "time_limit": 2, + "episode_length": 100, "wrappers": [dmc.suite.ball_in_cup.MPWrapper], "mp_kwargs": { "num_dof": 2, "num_basis": 5, - "duration": 1, + "duration": 2, "learn_goal": True, "alpha_phase": 2, "bandwidth_factor": 2, @@ -331,16 +331,16 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": f"ball_in_cup-catch", - "time_limit": 1, - "episode_length": 50, + "time_limit": 2, + "episode_length": 100, "wrappers": [dmc.suite.ball_in_cup.MPWrapper], "mp_kwargs": { "num_dof": 2, "num_basis": 5, - "duration": 1, + "duration": 2, "width": 0.025, "policy_type": "motor", - "weights_scale": 0.2, + "weights_scale": 1, "zero_start": True, "policy_kwargs": { "p_gains": 50, @@ -875,6 +875,23 @@ register( } ) +register( + id='FetchSlideDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchSlide-v1", + "wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "position" + } + } +) + register( id='FetchReachDenseDetPMP-v1', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', @@ -891,3 +908,38 @@ register( } } ) + +register( + id='FetchReachDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchReach-v1", + "wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "policy_type": "position" + } + } +) + +register( + id='ButtonPressDetPMP-v2', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "button-press-v2", + "wrappers": [meta.button_press.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 6.25, + "post_traj_time": 0, + "width": 0.025, + "policy_type": "position" + } + } +) + diff --git a/alr_envs/dmc/README.MD b/alr_envs/dmc/README.MD new file mode 100644 index 0000000..f7d7475 --- /dev/null +++ b/alr_envs/dmc/README.MD @@ -0,0 +1,3 @@ +# DeepMind Control (DMC) Wrappers + +These are the Environment Wrappers for selected [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them. \ No newline at end of file diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py index b29329d..95dd51d 100644 --- a/alr_envs/examples/examples_dmc.py +++ b/alr_envs/examples/examples_dmc.py @@ -17,7 +17,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): Returns: """ - env = alr_envs.make_env(env_id, seed) + env = alr_envs.make(env_id, seed) rewards = 0 obs = env.reset() print("observation shape:", env.observation_space.shape) diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py index 88d79d5..a215a8d 100644 --- a/alr_envs/examples/examples_general.py +++ b/alr_envs/examples/examples_general.py @@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True): """ - env = alr_envs.make_env(env_id, seed) + env = alr_envs.make(env_id, seed) rewards = 0 obs = env.reset() print("Observation shape: ", env.observation_space.shape) @@ -56,7 +56,7 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16 Returns: Tuple of (obs, reward, done, info) with type np.ndarray """ - env = gym.vector.AsyncVectorEnv([alr_envs.make_env_rank(env_id, seed, i) for i in range(n_cpu)]) + env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)]) # OR # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)]) diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index 480b58d..6decdb1 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -1,5 +1,4 @@ -from alr_envs import MPWrapper -from alr_envs.utils.make_env_helpers import make_dmp_env, make_env +import alr_envs def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True): @@ -16,7 +15,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend """ # While in this case gym.make() is possible to use as well, we recommend our custom make env function. # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. - env = make_env(env_name, seed) + env = alr_envs.make(env_name, seed) rewards = 0 # env.render(mode=None) @@ -71,7 +70,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations= "weights_scale": 50, "goal_scale": 0.1 } - env = make_env(env_name, seed, mp_kwargs=mp_kwargs) + env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs) # This time rendering every trajectory if render: @@ -113,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # You can also add other gym.Wrappers in case they are needed. - wrappers = [MPWrapper] + wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper] mp_kwargs = { "num_dof": 5, "num_basis": 5, @@ -125,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True): "weights_scale": 50, "goal_scale": 0.1 } - env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # OR for a deterministic ProMP: # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py index d001bc8..9f90be5 100644 --- a/alr_envs/examples/examples_open_ai.py +++ b/alr_envs/examples/examples_open_ai.py @@ -1,4 +1,4 @@ -from alr_envs.utils.make_env_helpers import make_env +import alr_envs def example_mp(env_name, seed=1): @@ -13,7 +13,7 @@ def example_mp(env_name, seed=1): """ # While in this case gym.make() is possible to use as well, we recommend our custom make env function. - env = make_env(env_name, seed) + env = alr_envs.make(env_name, seed) rewards = 0 obs = env.reset() @@ -29,13 +29,13 @@ def example_mp(env_name, seed=1): rewards = 0 obs = env.reset() + if __name__ == '__main__': # DMP - not supported yet - #example_mp("ReacherDetPMP-v2") + # example_mp("ReacherDMP-v2") # DetProMP example_mp("ContinuousMountainCarDetPMP-v0") example_mp("ReacherDetPMP-v2") example_mp("FetchReachDenseDetPMP-v1") example_mp("FetchSlideDenseDetPMP-v1") - diff --git a/alr_envs/meta/README.MD b/alr_envs/meta/README.MD new file mode 100644 index 0000000..c8d9cd1 --- /dev/null +++ b/alr_envs/meta/README.MD @@ -0,0 +1,26 @@ +# MetaWorld Wrappers + +These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them. +All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized. +Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following: +```python + return np.hstack([ + # Current observation + [False] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [True] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) +``` +For other tasks only the boolean values have to be adjusted accordingly. \ No newline at end of file diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py new file mode 100644 index 0000000..ebc4ab2 --- /dev/null +++ b/alr_envs/meta/__init__.py @@ -0,0 +1 @@ +from alr_envs.meta import button_press diff --git a/alr_envs/meta/button_press.py b/alr_envs/meta/button_press.py new file mode 100644 index 0000000..b552cc4 --- /dev/null +++ b/alr_envs/meta/button_press.py @@ -0,0 +1,48 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # This structure is the same for all metaworld environments. + # Only the observations which change could differ + return np.hstack([ + # Current observation + [False] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [True] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + # TODO: Include previous values? According to their source they might be wrong for the first iteration. + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.env.physics.named.data.qpos[:] + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.physics.named.data.qvel[:] + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/open_ai/README.MD b/alr_envs/open_ai/README.MD new file mode 100644 index 0000000..9c30ffe --- /dev/null +++ b/alr_envs/open_ai/README.MD @@ -0,0 +1,3 @@ +# OpenAI Gym Wrappers + +These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments in order to use our Motion Primitive gym interface with them. \ No newline at end of file diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py index 758d49f..b811354 100644 --- a/alr_envs/utils/__init__.py +++ b/alr_envs/utils/__init__.py @@ -4,8 +4,10 @@ from typing import Union import gym from gym.envs.registration import register +from alr_envs.utils.make_env_helpers import make -def make( + +def make_dmc( id: str, seed: int = 1, visualize_reward: bool = True, diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 0348492..466f7cf 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -3,21 +3,22 @@ from typing import Iterable, List, Type, Union import gym import numpy as np +from gym.envs.registration import EnvSpec from mp_env_api import MPEnvWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper -def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): +def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): """ TODO: Do we need this? Generate a callable to create a new gym environment with a given seed. The rank is added to the seed and can be used for example when using vector environments. - E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments + E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments with seeds 123 through 130. Hence, testing environments should be seeded with a value which is offset by the number of training environments. - Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns + Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns Args: env_id: name of the environment @@ -30,12 +31,12 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, * """ def f(): - return make_env(env_id, seed + rank, **kwargs) + return make(env_id, seed + rank, **kwargs) return f if return_callable else f() -def make_env(env_id: str, seed, **kwargs): +def make(env_id: str, seed, **kwargs): """ Converts an env_id to an environment with the gym API. This also works for DeepMind Control Suite interface_wrappers @@ -58,13 +59,30 @@ def make_env(env_id: str, seed, **kwargs): env.action_space.seed(seed) env.observation_space.seed(seed) except gym.error.Error: - # DMC - from alr_envs.utils import make - env = make(env_id, seed=seed, **kwargs) - assert env.base_step_limit == env.spec.max_episode_steps, \ - f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \ - f"the DMC environment specification of {env.base_step_limit} steps." + # MetaWorld env + import metaworld + if env_id in metaworld.ML1.ENV_NAMES: + env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs) + # setting this avoids generating the same initialization after each reset + env._freeze_rand_vec = False + # Manually set spec, as metaworld environments are not registered via gym + env.unwrapped.spec = EnvSpec(env_id) + # Set Timelimit based on the maximum allowed path length of the environment + env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length) + env.seed(seed) + env.action_space.seed(seed) + env.observation_space.seed(seed) + env.goal_space.seed(seed) + + else: + # DMC + from alr_envs.utils import make_dmc + env = make_dmc(env_id, seed=seed, **kwargs) + + assert env.base_step_limit == env.spec.max_episode_steps, \ + f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \ + f"the DMC environment specification of {env.base_step_limit} steps." return env @@ -84,7 +102,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1 """ # _env = gym.make(env_id) - _env = make_env(env_id, seed, **kwargs) + _env = make(env_id, seed, **kwargs) assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \ "At least one MPEnvWrapper is required in order to leverage motion primitive environments." @@ -175,7 +193,7 @@ def make_detpmp_env_helper(**kwargs): def make_contextual_env(env_id, context, seed, rank): - env = make_env(env_id, seed + rank, context=context) + env = make(env_id, seed + rank, context=context) # env = gym.make(env_id, context=context) # env.seed(seed + rank) return lambda: env diff --git a/alr_envs/utils/mp_env_async_sampler.py b/alr_envs/utils/mp_env_async_sampler.py index 67a774c..b24e908 100644 --- a/alr_envs/utils/mp_env_async_sampler.py +++ b/alr_envs/utils/mp_env_async_sampler.py @@ -3,7 +3,7 @@ from gym.vector.async_vector_env import AsyncVectorEnv import numpy as np from _collections import defaultdict -from alr_envs.utils.make_env_helpers import make_env_rank +from alr_envs.utils.make_env_helpers import make_rank def split_array(ary, size): @@ -54,7 +54,7 @@ class AlrMpEnvSampler: def __init__(self, env_id, num_envs, seed=0, **env_kwargs): self.num_envs = num_envs - self.env = AsyncVectorEnv([make_env_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)]) + self.env = AsyncVectorEnv([make_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)]) def __call__(self, params): params = np.atleast_2d(params) diff --git a/setup.py b/setup.py index 16374e4..55a1a95 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ setup( 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git', 'mujoco-py<2.1,>=2.0', 'dm_control' + 'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld' ], url='https://github.com/ALRhub/alr_envs/', diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py new file mode 100644 index 0000000..4eb1589 --- /dev/null +++ b/test/test_dmc_envs.py @@ -0,0 +1,127 @@ +import unittest + +import gym +import numpy as np + +from dm_control import suite, manipulation + +from alr_envs import make + +DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] +MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +SEED = 1 + + +class TestEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed= random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + length = env.spec.max_episode_steps + if iterations is None: + if length is None: + iterations = 1 + else: + iterations = length + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + obs = env.reset() + + assert done, "Done flag is not True after max episode length." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones) + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_dmc_functionality(self): + """Tests that environments runs without errors using random actions.""" + for env_id in DMC_ENVS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_dmc_determinism(self): + """Tests that identical seeds produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories, which should have the same state and action sequence + for env_id in DMC_ENVS: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, obs2, rwd2, done2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def test_manipulation_functionality(self): + """Tests that environments runs without errors using random actions.""" + for env_id in MANIPULATION_SPECS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_manipulation_determinism(self): + """Tests that identical seeds produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories, which should have the same state and action sequence + for env_id in MANIPULATION_SPECS: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, obs2, rwd2, done2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_envs.py b/test/test_envs.py index 2e5bf7e..8c21295 100644 --- a/test/test_envs.py +++ b/test/test_envs.py @@ -4,7 +4,7 @@ import gym import numpy as np import alr_envs # noqa -from alr_envs.utils.make_env_helpers import make_env +from alr_envs.utils.make_env_helpers import make ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point) SEED = 1 @@ -27,7 +27,7 @@ class TestEnvironments(unittest.TestCase): Returns: """ - env: gym.Env = make_env(env_id, seed=seed) + env: gym.Env = make(env_id, seed=seed) rewards = [] observations = [] dones = [] @@ -62,6 +62,7 @@ class TestEnvironments(unittest.TestCase): if done: obs = env.reset() + assert done, "Done flag is not True after max episode length." observations.append(obs) env.close() del env @@ -81,7 +82,6 @@ class TestEnvironments(unittest.TestCase): def test_environment_functionality(self): """Tests that environments runs without errors using random actions.""" for spec in ALL_SPECS: - # try: with self.subTest(msg=spec.id): self._run_env(spec.id) @@ -91,7 +91,6 @@ class TestEnvironments(unittest.TestCase): # Iterate over two trajectories, which should have the same state and action sequence for spec in ALL_SPECS: with self.subTest(msg=spec.id): - self._run_env(spec.id) traj1 = self._run_env(spec.id, seed=seed) traj2 = self._run_env(spec.id, seed=seed) for i, time_step in enumerate(zip(*traj1, *traj2)): diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py new file mode 100644 index 0000000..0a05ffc --- /dev/null +++ b/test/test_metaworld_envs.py @@ -0,0 +1,107 @@ +import unittest + +import gym +import numpy as np + +from alr_envs import make +from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE + +ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +SEED = 1 + + +class TestEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed= random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + actions = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + length = env.max_path_length + if iterations is None: + if length is None: + iterations = 1 + else: + iterations = length + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + actions.append(ac) + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + obs = env.reset() + + assert done, "Done flag is not True after max episode length." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_dmc_functionality(self): + """Tests that environments runs without errors using random actions.""" + for env_id in ALL_ENVS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_dmc_determinism(self): + """Tests that identical seeds produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories, which should have the same state and action sequence + for env_id in ALL_ENVS: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + +if __name__ == '__main__': + unittest.main()