integrated metaworld tasks into the framework

This commit is contained in:
ottofabian 2021-08-19 09:30:54 +02:00
parent a11965827d
commit 9b1ccb3235
17 changed files with 429 additions and 43 deletions

View File

@ -1,12 +1,12 @@
from gym.envs.registration import register from gym.envs.registration import register
from gym.wrappers import FlattenObservation from gym.wrappers import FlattenObservation
from alr_envs import classic_control, dmc, open_ai from alr_envs import classic_control, dmc, open_ai, meta
from alr_envs.utils.make_env_helpers import make_dmp_env from alr_envs.utils.make_env_helpers import make_dmp_env
from alr_envs.utils.make_env_helpers import make_detpmp_env from alr_envs.utils.make_env_helpers import make_detpmp_env
from alr_envs.utils.make_env_helpers import make_env from alr_envs.utils.make_env_helpers import make
from alr_envs.utils.make_env_helpers import make_env_rank from alr_envs.utils.make_env_helpers import make_rank
# Mujoco # Mujoco
@ -305,13 +305,13 @@ register(
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs={
"name": f"ball_in_cup-catch", "name": f"ball_in_cup-catch",
"time_limit": 1, "time_limit": 2,
"episode_length": 50, "episode_length": 100,
"wrappers": [dmc.suite.ball_in_cup.MPWrapper], "wrappers": [dmc.suite.ball_in_cup.MPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 2, "num_dof": 2,
"num_basis": 5, "num_basis": 5,
"duration": 1, "duration": 2,
"learn_goal": True, "learn_goal": True,
"alpha_phase": 2, "alpha_phase": 2,
"bandwidth_factor": 2, "bandwidth_factor": 2,
@ -331,16 +331,16 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={ kwargs={
"name": f"ball_in_cup-catch", "name": f"ball_in_cup-catch",
"time_limit": 1, "time_limit": 2,
"episode_length": 50, "episode_length": 100,
"wrappers": [dmc.suite.ball_in_cup.MPWrapper], "wrappers": [dmc.suite.ball_in_cup.MPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 2, "num_dof": 2,
"num_basis": 5, "num_basis": 5,
"duration": 1, "duration": 2,
"width": 0.025, "width": 0.025,
"policy_type": "motor", "policy_type": "motor",
"weights_scale": 0.2, "weights_scale": 1,
"zero_start": True, "zero_start": True,
"policy_kwargs": { "policy_kwargs": {
"p_gains": 50, "p_gains": 50,
@ -875,6 +875,23 @@ register(
} }
) )
register(
id='FetchSlideDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchSlide-v1",
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
register( register(
id='FetchReachDenseDetPMP-v1', id='FetchReachDenseDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
@ -891,3 +908,38 @@ register(
} }
} }
) )
register(
id='FetchReachDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchReach-v1",
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
register(
id='ButtonPressDetPMP-v2',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "button-press-v2",
"wrappers": [meta.button_press.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"policy_type": "position"
}
}
)

3
alr_envs/dmc/README.MD Normal file
View File

@ -0,0 +1,3 @@
# DeepMind Control (DMC) Wrappers
These are the Environment Wrappers for selected [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them.

View File

@ -17,7 +17,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
Returns: Returns:
""" """
env = alr_envs.make_env(env_id, seed) env = alr_envs.make(env_id, seed)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
print("observation shape:", env.observation_space.shape) print("observation shape:", env.observation_space.shape)

View File

@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
""" """
env = alr_envs.make_env(env_id, seed) env = alr_envs.make(env_id, seed)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
print("Observation shape: ", env.observation_space.shape) print("Observation shape: ", env.observation_space.shape)
@ -56,7 +56,7 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
Returns: Tuple of (obs, reward, done, info) with type np.ndarray Returns: Tuple of (obs, reward, done, info) with type np.ndarray
""" """
env = gym.vector.AsyncVectorEnv([alr_envs.make_env_rank(env_id, seed, i) for i in range(n_cpu)]) env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)])
# OR # OR
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)]) # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])

View File

@ -1,5 +1,4 @@
from alr_envs import MPWrapper import alr_envs
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True): def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
@ -16,7 +15,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
""" """
# While in this case gym.make() is possible to use as well, we recommend our custom make env function. # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
env = make_env(env_name, seed) env = alr_envs.make(env_name, seed)
rewards = 0 rewards = 0
# env.render(mode=None) # env.render(mode=None)
@ -71,7 +70,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
"weights_scale": 50, "weights_scale": 50,
"goal_scale": 0.1 "goal_scale": 0.1
} }
env = make_env(env_name, seed, mp_kwargs=mp_kwargs) env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
# This time rendering every trajectory # This time rendering every trajectory
if render: if render:
@ -113,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed. # You can also add other gym.Wrappers in case they are needed.
wrappers = [MPWrapper] wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper]
mp_kwargs = { mp_kwargs = {
"num_dof": 5, "num_dof": 5,
"num_basis": 5, "num_basis": 5,
@ -125,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
"weights_scale": 50, "weights_scale": 50,
"goal_scale": 0.1 "goal_scale": 0.1
} }
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP: # OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)

View File

@ -1,4 +1,4 @@
from alr_envs.utils.make_env_helpers import make_env import alr_envs
def example_mp(env_name, seed=1): def example_mp(env_name, seed=1):
@ -13,7 +13,7 @@ def example_mp(env_name, seed=1):
""" """
# While in this case gym.make() is possible to use as well, we recommend our custom make env function. # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = make_env(env_name, seed) env = alr_envs.make(env_name, seed)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
@ -29,13 +29,13 @@ def example_mp(env_name, seed=1):
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
if __name__ == '__main__': if __name__ == '__main__':
# DMP - not supported yet # DMP - not supported yet
#example_mp("ReacherDetPMP-v2") # example_mp("ReacherDMP-v2")
# DetProMP # DetProMP
example_mp("ContinuousMountainCarDetPMP-v0") example_mp("ContinuousMountainCarDetPMP-v0")
example_mp("ReacherDetPMP-v2") example_mp("ReacherDetPMP-v2")
example_mp("FetchReachDenseDetPMP-v1") example_mp("FetchReachDenseDetPMP-v1")
example_mp("FetchSlideDenseDetPMP-v1") example_mp("FetchSlideDenseDetPMP-v1")

26
alr_envs/meta/README.MD Normal file
View File

@ -0,0 +1,26 @@
# MetaWorld Wrappers
These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them.
All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized.
Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following:
```python
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[True] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
```
For other tasks only the boolean values have to be adjusted accordingly.

View File

@ -0,0 +1 @@
from alr_envs.meta import button_press

View File

@ -0,0 +1,48 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[True] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.physics.named.data.qpos[:]
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.physics.named.data.qvel[:]
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,3 @@
# OpenAI Gym Wrappers
These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments in order to use our Motion Primitive gym interface with them.

View File

@ -4,8 +4,10 @@ from typing import Union
import gym import gym
from gym.envs.registration import register from gym.envs.registration import register
from alr_envs.utils.make_env_helpers import make
def make(
def make_dmc(
id: str, id: str,
seed: int = 1, seed: int = 1,
visualize_reward: bool = True, visualize_reward: bool = True,

View File

@ -3,21 +3,22 @@ from typing import Iterable, List, Type, Union
import gym import gym
import numpy as np import numpy as np
from gym.envs.registration import EnvSpec
from mp_env_api import MPEnvWrapper from mp_env_api import MPEnvWrapper
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
""" """
TODO: Do we need this? TODO: Do we need this?
Generate a callable to create a new gym environment with a given seed. Generate a callable to create a new gym environment with a given seed.
The rank is added to the seed and can be used for example when using vector environments. The rank is added to the seed and can be used for example when using vector environments.
E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
with seeds 123 through 130. with seeds 123 through 130.
Hence, testing environments should be seeded with a value which is offset by the number of training environments. Hence, testing environments should be seeded with a value which is offset by the number of training environments.
Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
Args: Args:
env_id: name of the environment env_id: name of the environment
@ -30,12 +31,12 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, *
""" """
def f(): def f():
return make_env(env_id, seed + rank, **kwargs) return make(env_id, seed + rank, **kwargs)
return f if return_callable else f() return f if return_callable else f()
def make_env(env_id: str, seed, **kwargs): def make(env_id: str, seed, **kwargs):
""" """
Converts an env_id to an environment with the gym API. Converts an env_id to an environment with the gym API.
This also works for DeepMind Control Suite interface_wrappers This also works for DeepMind Control Suite interface_wrappers
@ -58,9 +59,26 @@ def make_env(env_id: str, seed, **kwargs):
env.action_space.seed(seed) env.action_space.seed(seed)
env.observation_space.seed(seed) env.observation_space.seed(seed)
except gym.error.Error: except gym.error.Error:
# MetaWorld env
import metaworld
if env_id in metaworld.ML1.ENV_NAMES:
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
env._freeze_rand_vec = False
# Manually set spec, as metaworld environments are not registered via gym
env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
env.goal_space.seed(seed)
else:
# DMC # DMC
from alr_envs.utils import make from alr_envs.utils import make_dmc
env = make(env_id, seed=seed, **kwargs) env = make_dmc(env_id, seed=seed, **kwargs)
assert env.base_step_limit == env.spec.max_episode_steps, \ assert env.base_step_limit == env.spec.max_episode_steps, \
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \ f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
@ -84,7 +102,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
""" """
# _env = gym.make(env_id) # _env = gym.make(env_id)
_env = make_env(env_id, seed, **kwargs) _env = make(env_id, seed, **kwargs)
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \ assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
"At least one MPEnvWrapper is required in order to leverage motion primitive environments." "At least one MPEnvWrapper is required in order to leverage motion primitive environments."
@ -175,7 +193,7 @@ def make_detpmp_env_helper(**kwargs):
def make_contextual_env(env_id, context, seed, rank): def make_contextual_env(env_id, context, seed, rank):
env = make_env(env_id, seed + rank, context=context) env = make(env_id, seed + rank, context=context)
# env = gym.make(env_id, context=context) # env = gym.make(env_id, context=context)
# env.seed(seed + rank) # env.seed(seed + rank)
return lambda: env return lambda: env

View File

@ -3,7 +3,7 @@ from gym.vector.async_vector_env import AsyncVectorEnv
import numpy as np import numpy as np
from _collections import defaultdict from _collections import defaultdict
from alr_envs.utils.make_env_helpers import make_env_rank from alr_envs.utils.make_env_helpers import make_rank
def split_array(ary, size): def split_array(ary, size):
@ -54,7 +54,7 @@ class AlrMpEnvSampler:
def __init__(self, env_id, num_envs, seed=0, **env_kwargs): def __init__(self, env_id, num_envs, seed=0, **env_kwargs):
self.num_envs = num_envs self.num_envs = num_envs
self.env = AsyncVectorEnv([make_env_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)]) self.env = AsyncVectorEnv([make_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
def __call__(self, params): def __call__(self, params):
params = np.atleast_2d(params) params = np.atleast_2d(params)

View File

@ -12,6 +12,7 @@ setup(
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git', 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
'mujoco-py<2.1,>=2.0', 'mujoco-py<2.1,>=2.0',
'dm_control' 'dm_control'
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld'
], ],
url='https://github.com/ALRhub/alr_envs/', url='https://github.com/ALRhub/alr_envs/',

127
test/test_dmc_envs.py Normal file
View File

@ -0,0 +1,127 @@
import unittest
import gym
import numpy as np
from dm_control import suite, manipulation
from alr_envs import make
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
SEED = 1
class TestEnvironments(unittest.TestCase):
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
iterations: Number of rollout steps to run
seed= random seeding
render: Render the episode
Returns:
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
length = env.spec.max_episode_steps
if iterations is None:
if length is None:
iterations = 1
else:
iterations = length
# number of samples(multiple environment steps)
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
self._verify_done(done)
rewards.append(reward)
dones.append(done)
if render:
env.render("human")
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
f"Observation {obs} received from {obs_type} "
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
def test_dmc_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in DMC_ENVS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_dmc_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in DMC_ENVS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
def test_manipulation_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in MANIPULATION_SPECS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_manipulation_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in MANIPULATION_SPECS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':
unittest.main()

View File

@ -4,7 +4,7 @@ import gym
import numpy as np import numpy as np
import alr_envs # noqa import alr_envs # noqa
from alr_envs.utils.make_env_helpers import make_env from alr_envs.utils.make_env_helpers import make
ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point) ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point)
SEED = 1 SEED = 1
@ -27,7 +27,7 @@ class TestEnvironments(unittest.TestCase):
Returns: Returns:
""" """
env: gym.Env = make_env(env_id, seed=seed) env: gym.Env = make(env_id, seed=seed)
rewards = [] rewards = []
observations = [] observations = []
dones = [] dones = []
@ -62,6 +62,7 @@ class TestEnvironments(unittest.TestCase):
if done: if done:
obs = env.reset() obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs) observations.append(obs)
env.close() env.close()
del env del env
@ -81,7 +82,6 @@ class TestEnvironments(unittest.TestCase):
def test_environment_functionality(self): def test_environment_functionality(self):
"""Tests that environments runs without errors using random actions.""" """Tests that environments runs without errors using random actions."""
for spec in ALL_SPECS: for spec in ALL_SPECS:
# try:
with self.subTest(msg=spec.id): with self.subTest(msg=spec.id):
self._run_env(spec.id) self._run_env(spec.id)
@ -91,7 +91,6 @@ class TestEnvironments(unittest.TestCase):
# Iterate over two trajectories, which should have the same state and action sequence # Iterate over two trajectories, which should have the same state and action sequence
for spec in ALL_SPECS: for spec in ALL_SPECS:
with self.subTest(msg=spec.id): with self.subTest(msg=spec.id):
self._run_env(spec.id)
traj1 = self._run_env(spec.id, seed=seed) traj1 = self._run_env(spec.id, seed=seed)
traj2 = self._run_env(spec.id, seed=seed) traj2 = self._run_env(spec.id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)): for i, time_step in enumerate(zip(*traj1, *traj2)):

107
test/test_metaworld_envs.py Normal file
View File

@ -0,0 +1,107 @@
import unittest
import gym
import numpy as np
from alr_envs import make
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
SEED = 1
class TestEnvironments(unittest.TestCase):
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
iterations: Number of rollout steps to run
seed= random seeding
render: Render the episode
Returns:
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
actions = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
length = env.max_path_length
if iterations is None:
if length is None:
iterations = 1
else:
iterations = length
# number of samples(multiple environment steps)
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
actions.append(ac)
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
self._verify_done(done)
rewards.append(reward)
dones.append(done)
if render:
env.render("human")
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
f"Observation {obs} received from {obs_type} "
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
def test_dmc_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in ALL_ENVS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_dmc_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in ALL_ENVS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':
unittest.main()