Merge remote-tracking branch 'origin/clean_api' into clean_api

2022-07-12 10:37:54 +02:00 · 2022-07-12 10:37:54 +02:00 · da49d1b7f7
commit da49d1b7f7
parent b3713a82a4 8dba7f199b
3 changed files with 121 additions and 59 deletions
--- a/alr_envs/black_box/time_aware_observation.py
+++ b/alr_envs/black_box/time_aware_observation.py
@ -0,0 +1,78 @@
+"""
+Adapted from: https://github.com/openai/gym/blob/907b1b20dd9ac0cba5803225059b9c6673702467/gym/wrappers/time_aware_observation.py
+License: MIT
+Copyright (c) 2016 OpenAI (https://openai.com)
+
+Wrapper for adding time aware observations to environment observation.
+"""
+import numpy as np
+
+import gym
+from gym.spaces import Box
+
+
+class TimeAwareObservation(gym.ObservationWrapper):
+    """Augment the observation with the current time step in the episode.
+
+    The observation space of the wrapped environment is assumed to be a flat :class:`Box`.
+    In particular, pixel observations are not supported. This wrapper will append the current timestep
+     within the current episode to the observation.
+
+    Example:
+        >>> import gym
+        >>> env = gym.make('CartPole-v1')
+        >>> env = TimeAwareObservation(env)
+        >>> env.reset()
+        array([ 0.03810719,  0.03522411,  0.02231044, -0.01088205,  0.        ])
+        >>> env.step(env.action_space.sample())[0]
+        array([ 0.03881167, -0.16021058,  0.0220928 ,  0.28875574,  1.        ])
+    """
+
+    def __init__(self, env: gym.Env):
+        """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box`
+        observation space.
+
+        Args:
+            env: The environment to apply the wrapper
+        """
+        super().__init__(env)
+        assert isinstance(env.observation_space, Box)
+        low = np.append(self.observation_space.low, 0.0)
+        high = np.append(self.observation_space.high, np.inf)
+        self.observation_space = Box(low, high, dtype=self.observation_space.dtype)
+        self.t = 0
+
+    def observation(self, observation):
+        """Adds to the observation with the current time step.
+
+        Args:
+            observation: The observation to add the time step to
+
+        Returns:
+            The observation with the time step appended to
+        """
+        return np.append(observation, self.t)
+
+    def step(self, action):
+        """Steps through the environment, incrementing the time step.
+
+        Args:
+            action: The action to take
+
+        Returns:
+            The environment's step using the action.
+        """
+        self.t += 1
+        return super().step(action)
+
+    def reset(self, **kwargs):
+        """Reset the environment setting the time to zero.
+
+        Args:
+            **kwargs: Kwargs to apply to env.reset()
+
+        Returns:
+            The reset environment
+        """
+        self.t = 0
+        return super().reset(**kwargs)
--- a/alr_envs/examples/examples_movement_primitives.py
+++ b/alr_envs/examples/examples_movement_primitives.py
@ -1,13 +1,11 @@
-import numpy as np
-
 import alr_envs


-def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
+def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True):
    """
-    Example for running a motion primitive based environment, which is already registered
+    Example for running a black box based environment, which is already registered
    Args:
-        env_name: DMP env_id
+        env_name: Black box env_id
        seed: seed for deterministic behaviour
        iterations: Number of rollout steps to run
        render: Render the episode
@ -15,8 +13,8 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
    Returns:

    """
-    # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
-    # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
+    # Equivalent to gym, we have make function which can be used to create environments.
+    # It takes care of seeding and enables the use of a variety of external environments using the gym interface.
    env = alr_envs.make(env_name, seed)

    rewards = 0
@ -37,8 +35,12 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
        else:
            env.render(mode=None)

+        # Now the action space is not the raw action but the parametrization of the trajectory generator,
+        # such as a ProMP
        ac = env.action_space.sample()
+        # This executes a full trajectory
        obs, reward, done, info = env.step(ac)
+        # Aggregated reward
        rewards += reward

        if done:
--- a/alr_envs/utils/make_env_helpers.py
+++ b/alr_envs/utils/make_env_helpers.py
@ -7,8 +7,7 @@ from typing import Iterable, Type, Union

 import gym
 import numpy as np
-
-import alr_envs
+from gym.envs.registration import register, registry

 try:
    from dm_control import suite, manipulation, composer
@ -22,20 +21,16 @@ except Exception:
    # catch Exception due to Mujoco-py
    pass

-from gym.envs.registration import registry
-from gym.envs.registration import register
-from gym.wrappers import TimeAwareObservation
-
+import alr_envs
 from alr_envs.black_box.black_box_wrapper import BlackBoxWrapper
 from alr_envs.black_box.factory.basis_generator_factory import get_basis_generator
 from alr_envs.black_box.factory.controller_factory import get_controller
 from alr_envs.black_box.factory.phase_generator_factory import get_phase_generator
 from alr_envs.black_box.factory.trajectory_generator_factory import get_trajectory_generator
 from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
+from alr_envs.black_box.time_aware_observation import TimeAwareObservation
 from alr_envs.utils.utils import nested_update

-ALL_FRAMEWORK_TYPES = ['meta', 'dmc', 'gym']
-

 def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
    """
@ -63,34 +58,21 @@ def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwa
    return f if return_callable else f()


-def make(env_id, seed, **kwargs):
-    return _make(env_id, seed, **kwargs)
-
-
-def _make(env_id: str, seed, **kwargs):
+def make(env_id: str, seed: int, **kwargs):
    """
    Converts an env_id to an environment with the gym API.
-    This also works for DeepMind Control Suite interface_wrappers
-    for which domain name and task name are expected to be separated by "-".
+    This also works for DeepMind Control Suite environments that are wrapped using the DMCWrapper, they can be
+    specified with "dmc:domain_name-task_name"
+    Analogously, metaworld tasks can be created as "metaworld:env_id-v2".
+
    Args:
-        env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
+        env_id: spec or env_id for gym tasks, external environments require a domain specification
        **kwargs: Additional kwargs for the constructor such as pixel observations, etc.

    Returns: Gym environment

    """

-    # 'dmc:domain-task'
-    # 'gym:name-vX'
-    # 'meta:name-vX'
-    # 'meta:bb:name-vX'
-    # 'hand:name-vX'
-    # 'name-vX'
-    # 'bb:name-vX'
-    #
-    # env_id.split(':')
-    # if 'dmc' :
-
    if ':' in env_id:
        split_id = env_id.split(':')
        framework, env_id = split_id[-2:]
@ -98,13 +80,17 @@ def _make(env_id: str, seed, **kwargs):
        framework = None

    if framework == 'metaworld':
-        # MetaWorld env
-        env = make_metaworld(env_id, seed=seed, **kwargs)
+        # MetaWorld environment
+        env = make_metaworld(env_id, seed, **kwargs)
    elif framework == 'dmc':
-        # DeepMind Controlp
-        env = make_dmc(env_id, seed=seed, **kwargs)
+        # DeepMind Control environment
+        env = make_dmc(env_id, seed, **kwargs)
    else:
-        env = make_gym(env_id, seed=seed, **kwargs)
+        env = make_gym(env_id, seed, **kwargs)
+
+    env.seed(seed)
+    env.action_space.seed(seed)
+    env.observation_space.seed(seed)

    return env

@ -285,7 +271,7 @@ def make_dmc(
    )

    env = gym.make(gym_id)
-    env.seed(seed=seed)
+    env.seed(seed)
    return env


@ -300,15 +286,6 @@ def make_metaworld(env_id, seed, **kwargs):
    # New argument to use global seeding
    _env.seeded_rand_vec = True

-    # Manually set spec, as metaworld environments are not registered via gym
-    # _env.unwrapped.spec = EnvSpec(env_id)
-    # Set Timelimit based on the maximum allowed path length of the environment
-    # _env = gym.wrappers.TimeLimit(_env, max_episode_steps=_env.max_path_length)
-    # _env.seed(seed)
-    # _env.action_space.seed(seed)
-    # _env.observation_space.seed(seed)
-    # _env.goal_space.seed(seed)
-
    gym_id = uuid.uuid4().hex + '-v1'

    register(
@ -319,28 +296,33 @@ def make_metaworld(env_id, seed, **kwargs):

    # TODO enable checker when the incorrect dtype of obs and observation space are fixed by metaworld
    env = gym.make(gym_id, disable_env_checker=True)
-    env.seed(seed=seed)
    return env


 def make_gym(env_id, seed, **kwargs):
-    # This access is required to allow for nested dict updates for BB envs
-    spec = registry.get(env_id)
-    all_kwargs = deepcopy(spec.kwargs)
+    """
+    Create
+    Args:
+        env_id:
+        seed:
+        **kwargs:
+
+    Returns:
+
+    """
+    # Getting the existing keywords to allow for nested dict updates for BB envs
+    # gym only allows for non nested updates.
+    all_kwargs = deepcopy(registry.get(env_id).kwargs)
    nested_update(all_kwargs, kwargs)
    kwargs = all_kwargs

-    # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
-    # if env_id.startswith("dmc") or any(s in env_id.lower() for s in ['promp', 'dmp', 'prodmp']):
+    # Add seed to kwargs for bb environments to pass seed to step environments
    all_bb_envs = sum(alr_envs.ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS.values(), [])
-    if env_id.startswith("dmc") or env_id in all_bb_envs:
+    if env_id in all_bb_envs:
        kwargs.update({"seed": seed})

    # Gym
    env = gym.make(env_id, **kwargs)
-    env.seed(seed)
-    env.action_space.seed(seed)
-    env.observation_space.seed(seed)
    return env