integrated metaworld tasks into the framework

This commit is contained in:
ottofabian 2021-08-19 09:30:54 +02:00
parent a11965827d
commit 9b1ccb3235
17 changed files with 429 additions and 43 deletions

View File

@ -1,12 +1,12 @@
from gym.envs.registration import register
from gym.wrappers import FlattenObservation
from alr_envs import classic_control, dmc, open_ai
from alr_envs import classic_control, dmc, open_ai, meta
from alr_envs.utils.make_env_helpers import make_dmp_env
from alr_envs.utils.make_env_helpers import make_detpmp_env
from alr_envs.utils.make_env_helpers import make_env
from alr_envs.utils.make_env_helpers import make_env_rank
from alr_envs.utils.make_env_helpers import make
from alr_envs.utils.make_env_helpers import make_rank
# Mujoco
@ -305,13 +305,13 @@ register(
# max_episode_steps=1,
kwargs={
"name": f"ball_in_cup-catch",
"time_limit": 1,
"episode_length": 50,
"time_limit": 2,
"episode_length": 100,
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
"mp_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 1,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
@ -331,16 +331,16 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": f"ball_in_cup-catch",
"time_limit": 1,
"episode_length": 50,
"time_limit": 2,
"episode_length": 100,
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
"mp_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 1,
"duration": 2,
"width": 0.025,
"policy_type": "motor",
"weights_scale": 0.2,
"weights_scale": 1,
"zero_start": True,
"policy_kwargs": {
"p_gains": 50,
@ -875,6 +875,23 @@ register(
}
)
register(
id='FetchSlideDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchSlide-v1",
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
register(
id='FetchReachDenseDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
@ -891,3 +908,38 @@ register(
}
}
)
register(
id='FetchReachDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchReach-v1",
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
register(
id='ButtonPressDetPMP-v2',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "button-press-v2",
"wrappers": [meta.button_press.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"policy_type": "position"
}
}
)

3
alr_envs/dmc/README.MD Normal file
View File

@ -0,0 +1,3 @@
# DeepMind Control (DMC) Wrappers
These are the Environment Wrappers for selected [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them.

View File

@ -17,7 +17,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
Returns:
"""
env = alr_envs.make_env(env_id, seed)
env = alr_envs.make(env_id, seed)
rewards = 0
obs = env.reset()
print("observation shape:", env.observation_space.shape)

View File

@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
"""
env = alr_envs.make_env(env_id, seed)
env = alr_envs.make(env_id, seed)
rewards = 0
obs = env.reset()
print("Observation shape: ", env.observation_space.shape)
@ -56,7 +56,7 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
"""
env = gym.vector.AsyncVectorEnv([alr_envs.make_env_rank(env_id, seed, i) for i in range(n_cpu)])
env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)])
# OR
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])

View File

@ -1,5 +1,4 @@
from alr_envs import MPWrapper
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
import alr_envs
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
@ -16,7 +15,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
env = make_env(env_name, seed)
env = alr_envs.make(env_name, seed)
rewards = 0
# env.render(mode=None)
@ -71,7 +70,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
# This time rendering every trajectory
if render:
@ -113,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [MPWrapper]
wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper]
mp_kwargs = {
"num_dof": 5,
"num_basis": 5,
@ -125,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)

View File

@ -1,4 +1,4 @@
from alr_envs.utils.make_env_helpers import make_env
import alr_envs
def example_mp(env_name, seed=1):
@ -13,7 +13,7 @@ def example_mp(env_name, seed=1):
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = make_env(env_name, seed)
env = alr_envs.make(env_name, seed)
rewards = 0
obs = env.reset()
@ -29,13 +29,13 @@ def example_mp(env_name, seed=1):
rewards = 0
obs = env.reset()
if __name__ == '__main__':
# DMP - not supported yet
#example_mp("ReacherDetPMP-v2")
# example_mp("ReacherDMP-v2")
# DetProMP
example_mp("ContinuousMountainCarDetPMP-v0")
example_mp("ReacherDetPMP-v2")
example_mp("FetchReachDenseDetPMP-v1")
example_mp("FetchSlideDenseDetPMP-v1")

26
alr_envs/meta/README.MD Normal file
View File

@ -0,0 +1,26 @@
# MetaWorld Wrappers
These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them.
All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized.
Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following:
```python
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[True] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
```
For other tasks only the boolean values have to be adjusted accordingly.

View File

@ -0,0 +1 @@
from alr_envs.meta import button_press

View File

@ -0,0 +1,48 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[True] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.env.physics.named.data.qpos[:]
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.physics.named.data.qvel[:]
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,3 @@
# OpenAI Gym Wrappers
These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments in order to use our Motion Primitive gym interface with them.

View File

@ -4,8 +4,10 @@ from typing import Union
import gym
from gym.envs.registration import register
from alr_envs.utils.make_env_helpers import make
def make(
def make_dmc(
id: str,
seed: int = 1,
visualize_reward: bool = True,

View File

@ -3,21 +3,22 @@ from typing import Iterable, List, Type, Union
import gym
import numpy as np
from gym.envs.registration import EnvSpec
from mp_env_api import MPEnvWrapper
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
"""
TODO: Do we need this?
Generate a callable to create a new gym environment with a given seed.
The rank is added to the seed and can be used for example when using vector environments.
E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
with seeds 123 through 130.
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
Args:
env_id: name of the environment
@ -30,12 +31,12 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, *
"""
def f():
return make_env(env_id, seed + rank, **kwargs)
return make(env_id, seed + rank, **kwargs)
return f if return_callable else f()
def make_env(env_id: str, seed, **kwargs):
def make(env_id: str, seed, **kwargs):
"""
Converts an env_id to an environment with the gym API.
This also works for DeepMind Control Suite interface_wrappers
@ -58,9 +59,26 @@ def make_env(env_id: str, seed, **kwargs):
env.action_space.seed(seed)
env.observation_space.seed(seed)
except gym.error.Error:
# MetaWorld env
import metaworld
if env_id in metaworld.ML1.ENV_NAMES:
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
env._freeze_rand_vec = False
# Manually set spec, as metaworld environments are not registered via gym
env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
env.goal_space.seed(seed)
else:
# DMC
from alr_envs.utils import make
env = make(env_id, seed=seed, **kwargs)
from alr_envs.utils import make_dmc
env = make_dmc(env_id, seed=seed, **kwargs)
assert env.base_step_limit == env.spec.max_episode_steps, \
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
@ -84,7 +102,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
"""
# _env = gym.make(env_id)
_env = make_env(env_id, seed, **kwargs)
_env = make(env_id, seed, **kwargs)
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
@ -175,7 +193,7 @@ def make_detpmp_env_helper(**kwargs):
def make_contextual_env(env_id, context, seed, rank):
env = make_env(env_id, seed + rank, context=context)
env = make(env_id, seed + rank, context=context)
# env = gym.make(env_id, context=context)
# env.seed(seed + rank)
return lambda: env

View File

@ -3,7 +3,7 @@ from gym.vector.async_vector_env import AsyncVectorEnv
import numpy as np
from _collections import defaultdict
from alr_envs.utils.make_env_helpers import make_env_rank
from alr_envs.utils.make_env_helpers import make_rank
def split_array(ary, size):
@ -54,7 +54,7 @@ class AlrMpEnvSampler:
def __init__(self, env_id, num_envs, seed=0, **env_kwargs):
self.num_envs = num_envs
self.env = AsyncVectorEnv([make_env_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
self.env = AsyncVectorEnv([make_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
def __call__(self, params):
params = np.atleast_2d(params)

View File

@ -12,6 +12,7 @@ setup(
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
'mujoco-py<2.1,>=2.0',
'dm_control'
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld'
],
url='https://github.com/ALRhub/alr_envs/',

127
test/test_dmc_envs.py Normal file
View File

@ -0,0 +1,127 @@
import unittest
import gym
import numpy as np
from dm_control import suite, manipulation
from alr_envs import make
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
SEED = 1
class TestEnvironments(unittest.TestCase):
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
iterations: Number of rollout steps to run
seed= random seeding
render: Render the episode
Returns:
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
length = env.spec.max_episode_steps
if iterations is None:
if length is None:
iterations = 1
else:
iterations = length
# number of samples(multiple environment steps)
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
self._verify_done(done)
rewards.append(reward)
dones.append(done)
if render:
env.render("human")
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
f"Observation {obs} received from {obs_type} "
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
def test_dmc_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in DMC_ENVS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_dmc_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in DMC_ENVS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
def test_manipulation_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in MANIPULATION_SPECS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_manipulation_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in MANIPULATION_SPECS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':
unittest.main()

View File

@ -4,7 +4,7 @@ import gym
import numpy as np
import alr_envs # noqa
from alr_envs.utils.make_env_helpers import make_env
from alr_envs.utils.make_env_helpers import make
ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point)
SEED = 1
@ -27,7 +27,7 @@ class TestEnvironments(unittest.TestCase):
Returns:
"""
env: gym.Env = make_env(env_id, seed=seed)
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
dones = []
@ -62,6 +62,7 @@ class TestEnvironments(unittest.TestCase):
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
@ -81,7 +82,6 @@ class TestEnvironments(unittest.TestCase):
def test_environment_functionality(self):
"""Tests that environments runs without errors using random actions."""
for spec in ALL_SPECS:
# try:
with self.subTest(msg=spec.id):
self._run_env(spec.id)
@ -91,7 +91,6 @@ class TestEnvironments(unittest.TestCase):
# Iterate over two trajectories, which should have the same state and action sequence
for spec in ALL_SPECS:
with self.subTest(msg=spec.id):
self._run_env(spec.id)
traj1 = self._run_env(spec.id, seed=seed)
traj2 = self._run_env(spec.id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):

107
test/test_metaworld_envs.py Normal file
View File

@ -0,0 +1,107 @@
import unittest
import gym
import numpy as np
from alr_envs import make
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
SEED = 1
class TestEnvironments(unittest.TestCase):
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
iterations: Number of rollout steps to run
seed= random seeding
render: Render the episode
Returns:
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
actions = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
length = env.max_path_length
if iterations is None:
if length is None:
iterations = 1
else:
iterations = length
# number of samples(multiple environment steps)
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
actions.append(ac)
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
self._verify_done(done)
rewards.append(reward)
dones.append(done)
if render:
env.render("human")
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
f"Observation {obs} received from {obs_type} "
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
def test_dmc_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in ALL_ENVS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_dmc_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in ALL_ENVS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':
unittest.main()