Adapted test to new gym interface

In prevous gym versions executing a step returned obs, reward, done, info = env.step(...) With the switch to gymnasium this has changed to obs, reward, terminated, truncated, info = env.step(...) We also made the code a bit more self explainatory.
2023-05-18 17:31:40 +02:00 · 2023-05-18 17:31:40 +02:00 · a559f92562
commit a559f92562
parent 228e343a1b
3 changed files with 78 additions and 60 deletions
--- a/test/test_black_box.py
+++ b/test/test_black_box.py
@ -94,11 +94,13 @@ def test_verbosity(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]
                            {'phase_generator_type': 'exp'},
                            {'basis_generator_type': basis_generator_type})
    env.reset()
-    info_keys = list(env.step(env.action_space.sample())[3].keys())
+    _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample())
+    info_keys = list(info.keys())

    env_step = fancy_gym.make(env_id, SEED)
    env_step.reset()
-    info_keys_step = env_step.step(env_step.action_space.sample())[3].keys()
+    _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample())
+    info_keys_step = info.keys()

    assert all(e in info_keys for e in info_keys_step)
    assert 'trajectory_length' in info_keys
@ -122,7 +124,8 @@ def test_length(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWrapper]]):

    for _ in range(5):
        env.reset()
-        length = env.step(env.action_space.sample())[3]['trajectory_length']
+        _obs, _reward, _terminated, _truncated, info = env.step(env.action_space.sample())
+        length = info['trajectory_length']

        assert length == env.spec.max_episode_steps

@ -138,7 +141,8 @@ def test_aggregation(mp_type: str, reward_aggregation: Callable[[np.ndarray], fl
                            {'basis_generator_type': basis_generator_type})
    env.reset()
    # ToyEnv only returns 1 as reward
-    assert env.step(env.action_space.sample())[1] == reward_aggregation(np.ones(50, ))
+    _obs, reward, _terminated, _truncated, _info = env.step(env.action_space.sample())
+    assert reward == reward_aggregation(np.ones(50, ))


@pytest.mark.parametrize('mp_type', ['promp', 'dmp'])
@ -250,6 +254,8 @@ def test_learn_tau(mp_type: str, tau: float):
        assert np.all(vel[:tau_time_steps - 2] != vel[-1])
 #
 #
+
+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('delay', [0, 0.25, 0.5, 0.75])
 def test_learn_delay(mp_type: str, delay: float):
@ -292,6 +298,8 @@ def test_learn_delay(mp_type: str, delay: float):
        assert np.all(vel[max(1, delay_time_steps)] != vel[0])
 #
 #
+
+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('tau', [0.25, 0.5, 0.75, 1])
@pytest.mark.parametrize('delay', [0.25, 0.5, 0.75, 1])
@ -312,15 +320,16 @@ def test_learn_tau_and_delay(mp_type: str, tau: float, delay: float):
    if env.spec.max_episode_steps * env.dt < delay + tau:
        return

-    d = True
+    done = True
    for i in range(5):
-        if d:
+        if done:
            env.reset()
        action = env.action_space.sample()
        action[0] = tau
        action[1] = delay

-        obs, r, d, info = env.step(action)
+        _obs, _reward, terminated, truncated, info = env.step(action)
+        done = terminated or truncated

        length = info['trajectory_length']
        assert length == env.spec.max_episode_steps
--- a/test/test_metaworld_envs.py
+++ b/test/test_metaworld_envs.py
@ -8,11 +8,7 @@ from test.utils import run_env, run_env_determinism

 METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in
                 ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
-<<<<<<< HEAD
-METAWORLD_MP_IDS = chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values())
-=======
 METAWORLD_MP_IDS = list(chain(*fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values()))
->>>>>>> 47-update-to-new-gym-api
 SEED = 1


--- a/test/test_replanning_sequencing.py
+++ b/test/test_replanning_sequencing.py
@ -79,13 +79,14 @@ def test_learn_sub_trajectories(mp_type: str, env_wrap: Tuple[str, Type[RawInter
    # This also verifies we are not adding the TimeAwareObservationWrapper twice
    assert env.observation_space == env_step.observation_space

-    d = True
+    done = True

    for i in range(25):
-        if d:
+        if done:
            env.reset()
        action = env.action_space.sample()
-        obs, r, d, info = env.step(action)
+        _obs, _reward, terminated, truncated, info = env.step(action)
+        done = terminated or truncated

        length = info['trajectory_length']

@ -112,7 +113,7 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra
    if add_time_aware_wrapper_before:
        wrappers += [TimeAwareObservation]

-    replanning_schedule = lambda c_pos, c_vel, obs, c_action, t: t % replanning_time == 0
+    def replanning_schedule(c_pos, c_vel, obs, c_action, t): return t % replanning_time == 0

    basis_generator_type = 'prodmp' if mp_type == 'prodmp' else 'rbf'
    phase_generator_type = 'exp' if 'dmp' in mp_type else 'linear'
@ -134,18 +135,20 @@ def test_replanning_time(mp_type: str, env_wrap: Tuple[str, Type[RawInterfaceWra
    # Make 3 episodes, total steps depend on the replanning steps
    for i in range(3 * episode_steps):
        action = env.action_space.sample()
-        obs, r, d, info = env.step(action)
+        _obs, _reward, terminated, truncated, info = env.step(action)
+        done = terminated or truncated

        length = info['trajectory_length']

-        if d:
+        if done:
            # Check if number of steps until termination match the replanning interval
-            print(d, (i + 1), episode_steps)
+            print(done, (i + 1), episode_steps)
            assert (i + 1) % episode_steps == 0
            env.reset()

        assert replanning_schedule(None, None, None, None, length)

+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4])
@pytest.mark.parametrize('sub_segment_steps', [5, 10])
@ -167,13 +170,16 @@ def test_max_planning_times(mp_type: str, max_planning_times: int, sub_segment_s
                             },
                            seed=SEED)
    _ = env.reset()
-    d = False
+    done = False
    planning_times = 0
-    while not d:
-        _, _, d, _ = env.step(env.action_space.sample())
+    while not done:
+        action = env.action_space.sample()
+        _obs, _reward, terminated, truncated, _info = env.step(action)
+        done = terminated or truncated
        planning_times += 1
    assert planning_times == max_planning_times

+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4])
@pytest.mark.parametrize('sub_segment_steps', [5, 10])
@ -196,15 +202,17 @@ def test_replanning_with_learn_tau(mp_type: str, max_planning_times: int, sub_se
                             },
                            seed=SEED)
    _ = env.reset()
-    d = False
+    done = False
    planning_times = 0
-    while not d:
+    while not done:
        action = env.action_space.sample()
        action[0] = tau
-        _, _, d, info = env.step(action)
+        _obs, _reward, terminated, truncated, _info = env.step(action)
+        done = terminated or truncated
        planning_times += 1
    assert planning_times == max_planning_times

+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4])
@pytest.mark.parametrize('sub_segment_steps', [5, 10])
@ -227,12 +235,13 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_
                             },
                            seed=SEED)
    _ = env.reset()
-    d = False
+    done = False
    planning_times = 0
-    while not d:
+    while not done:
        action = env.action_space.sample()
        action[0] = delay
-        _, _, d, info = env.step(action)
+        _obs, _reward, terminated, truncated, info = env.step(action)
+        done = terminated or truncated

        delay_time_steps = int(np.round(delay / env.dt))
        pos = info['positions'].flatten()
@ -256,6 +265,7 @@ def test_replanning_with_learn_delay(mp_type: str, max_planning_times: int, sub_

    assert planning_times == max_planning_times

+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('max_planning_times', [1, 2, 3])
@pytest.mark.parametrize('sub_segment_steps', [5, 10, 15])
@ -280,13 +290,14 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i
                             },
                            seed=SEED)
    _ = env.reset()
-    d = False
+    done = False
    planning_times = 0
-    while not d:
+    while not done:
        action = env.action_space.sample()
        action[0] = tau
        action[1] = delay
-        _, _, d, info = env.step(action)
+        _obs, _reward, terminated, truncated, info = env.step(action)
+        done = terminated or truncated

        delay_time_steps = int(np.round(delay / env.dt))

@ -306,6 +317,7 @@ def test_replanning_with_learn_delay_and_tau(mp_type: str, max_planning_times: i

    assert planning_times == max_planning_times

+
@pytest.mark.parametrize('mp_type', ['promp', 'prodmp'])
@pytest.mark.parametrize('max_planning_times', [1, 2, 3, 4])
@pytest.mark.parametrize('sub_segment_steps', [5, 10])
@ -327,7 +339,8 @@ def test_replanning_schedule(mp_type: str, max_planning_times: int, sub_segment_
                             },
                            seed=SEED)
    _ = env.reset()
-    d = False
    for i in range(max_planning_times):
-        _, _, d, _ = env.step(env.action_space.sample())
-    assert d
+        action = env.action_space.sample()
+        _obs, _reward, terminated, truncated, _info = env.step(action)
+        done = terminated or truncated
+    assert done