From 3b215cd87751d1dec33c6aa51a32954c60ad2856 Mon Sep 17 00:00:00 2001 From: ottofabian Date: Mon, 28 Jun 2021 17:25:53 +0200 Subject: [PATCH] added dmc2gym conversion and example how to leverage DMPs --- README.md | 2 +- alr_envs/__init__.py | 2 +- .../hole_reacher/hole_reacher_mp_wrapper.py | 2 +- .../simple_reacher_mp_wrapper.py | 2 +- .../viapoint_reacher_mp_wrapper.py | 2 +- alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py | 27 +++ alr_envs/dmc/__init__.py | 0 alr_envs/examples/__init__.py | 0 alr_envs/examples/examples_dmc.py | 73 +++++++ alr_envs/examples/examples_general.py | 74 +++++++ .../examples/examples_motion_primitives.py | 103 ++++++++++ .../ball_in_a_cup/ball_in_a_cup_mp_wrapper.py | 2 +- .../ball_in_a_cup_positional_wrapper.py | 2 +- alr_envs/utils/__init__.py | 60 ++++++ alr_envs/utils/dmc2gym_wrapper.py | 182 ++++++++++++++++++ alr_envs/utils/make_env_helpers.py | 66 +++++-- example.py | 117 ----------- 17 files changed, 573 insertions(+), 143 deletions(-) create mode 100644 alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py create mode 100644 alr_envs/dmc/__init__.py create mode 100644 alr_envs/examples/__init__.py create mode 100644 alr_envs/examples/examples_dmc.py create mode 100644 alr_envs/examples/examples_general.py create mode 100644 alr_envs/examples/examples_motion_primitives.py create mode 100644 alr_envs/utils/dmc2gym_wrapper.py delete mode 100644 example.py diff --git a/README.md b/README.md index 41d87f2..ce95b8d 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ cd alr_envs ```bash pip install -e . ``` -4. Use (see [example.py](./example.py)): +4. Use (see [example.py](alr_envs/examples/examples_general.py)): ```python import gym diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index de0df8a..90c60e4 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -463,7 +463,7 @@ register( "weights_scale": 0.2, "zero_start": True, "zero_goal": True, - "p_gains": np.array([4./3., 2.4, 2.5, 5./3., 2., 2., 1.25]), + "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]), "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025]) } ) diff --git a/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py b/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py index d39edcd..a2fd2a7 100644 --- a/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py +++ b/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py @@ -2,7 +2,7 @@ from typing import Union import numpy as np -from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class HoleReacherMPWrapper(MPEnvWrapper): diff --git a/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py b/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py index 71f6043..ee96fc6 100644 --- a/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py +++ b/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py @@ -2,7 +2,7 @@ from typing import Union import numpy as np -from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class SimpleReacherMPWrapper(MPEnvWrapper): diff --git a/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py b/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py index a3ec7f0..9764d4d 100644 --- a/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py +++ b/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py @@ -2,7 +2,7 @@ from typing import Union import numpy as np -from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class ViaPointReacherMPWrapper(MPEnvWrapper): diff --git a/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py b/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py new file mode 100644 index 0000000..5be3baf --- /dev/null +++ b/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py @@ -0,0 +1,27 @@ +from typing import Union + +import numpy as np + +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper + + +class BallInCupMPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # Besides the ball position, the environment is always set to 0. + return np.hstack([ + [False] * 2, # cup position + [True] * 2, # ball position + [False] * 2, # cup velocity + [False] * 2, # ball velocity + ]) + + @property + def start_pos(self) -> Union[float, int, np.ndarray]: + return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']]) + + @property + def dt(self) -> Union[float, int]: + # Taken from: https://github.com/deepmind/dm_control/blob/master/dm_control/suite/ball_in_cup.py#L27 + return 0.02 diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/examples/__init__.py b/alr_envs/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py new file mode 100644 index 0000000..a55d467 --- /dev/null +++ b/alr_envs/examples/examples_dmc.py @@ -0,0 +1,73 @@ +from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import BallInCupMPWrapper +from alr_envs.utils.make_env_helpers import make_dmp_env, make_env + + +def example_dmc(env_name="fish-swim", seed=1): + env = make_env(env_name, seed) + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(2000): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_custom_dmc_and_mp(seed=1): + """ + Example for running a custom motion primitive based environments based off of a dmc task. + Our already registered environments follow the same structure, but do not directly allow for modifications. + Hence, this also allows to adjust hyperparameters of the motion primitives more easily. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/alr_envs/ + Args: + seed: seed + + Returns: + + """ + + base_env = "ball_in_cup-catch" + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. + # You can also add other gym.Wrappers in case they are needed. + # wrappers = [HoleReacherMPWrapper] + wrappers = [BallInCupMPWrapper] + mp_kwargs = { + "num_dof": 2, # env.start_pos + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, **mp_kwargs) + # OR for a deterministic ProMP: + # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, **mp_args) + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(10): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +if __name__ == '__main__': + example_dmc() + example_custom_dmc_and_mp() diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py new file mode 100644 index 0000000..041c281 --- /dev/null +++ b/alr_envs/examples/examples_general.py @@ -0,0 +1,74 @@ +import warnings +from collections import defaultdict + +import gym +import numpy as np + +from alr_envs.utils.make_env_helpers import make_env +from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist + + +def example_general(env_id='alr_envs:ALRReacher-v0', seed=1): + """ + Example for running any env in the step based setting. + This also includes DMC environments when leveraging our custom make_env function. + """ + + env = make_env(env_id, seed) + rewards = 0 + obs = env.reset() + print("Observation shape: ", obs.shape) + print("Action shape: ", env.action_space.shape) + + # number of environment steps + for i in range(10000): + obs, reward, done, info = env.step(env.action_space.sample()) + rewards += reward + + # if i % 1 == 0: + # env.render() + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)): + def sample(env: gym.vector.VectorEnv, n_samples=100): + # for plotting + rewards = np.zeros(n_cpu) + + # this would generate more samples than requested if n_samples % num_envs != 0 + repeat = int(np.ceil(n_samples / env.num_envs)) + vals = defaultdict(list) + for i in range(repeat): + obs, reward, done, info = envs.step(envs.action_space.sample()) + vals['obs'].append(obs) + vals['reward'].append(reward) + vals['done'].append(done) + vals['info'].append(info) + rewards += reward + if np.any(done): + print(rewards[done]) + rewards[done] = 0 + + # do not return values above threshold + return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),) + + from alr_envs.utils.make_env_helpers import make_env_rank + envs = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)]) + # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)]) + + obs = envs.reset() + print(sample(envs, 16)) + + +if __name__ == '__main__': + # DMC + # example_general("fish-swim") + + # custom mujoco env + # example_general("alr_envs:ALRReacher-v0") + + example_general("ball_in_cup-catch") diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py new file mode 100644 index 0000000..d3755d7 --- /dev/null +++ b/alr_envs/examples/examples_motion_primitives.py @@ -0,0 +1,103 @@ +from alr_envs import HoleReacherMPWrapper +from alr_envs.utils.make_env_helpers import make_dmp_env, make_env + + +def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1): + """ + Example for running a motion primitive based environment, which is already registered + Args: + env_name: DMP env_id + seed: seed + + Returns: + + """ + # While in this case gym.make() is possible to use as well, we recommend our custom make env function. + # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. + env = make_env(env_name, seed) + rewards = 0 + # env.render(mode=None) + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(10): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if i % 1 == 0: + # render full DMP trajectory + # render can only be called once in the beginning as well. That would render every trajectory + # Calling it after every trajectory allows to modify the mode. mode=None, disables rendering. + env.render(mode="human") + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_custom_mp(seed=1): + """ + Example for running a custom motion primitive based environments. + Our already registered environments follow the same structure, but do not directly allow for modifications. + Hence, this also allows to adjust hyperparameters of the motion primitives more easily. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/alr_envs/ + Args: + seed: seed + + Returns: + + """ + + base_env = "alr_envs:HoleReacher-v1" + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. + # You can also add other gym.Wrappers in case they are needed. + wrappers = [HoleReacherMPWrapper] + mp_kwargs = { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, **mp_kwargs) + # OR for a deterministic ProMP: + # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed) + + rewards = 0 + # env.render(mode=None) + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(10): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if i % 1 == 0: + # render full DMP trajectory + # render can only be called once in the beginning as well. That would render every trajectory + # Calling it after every trajectory allows to modify the mode. mode=None, disables rendering. + env.render(mode="human") + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +if __name__ == '__main__': + # DMP + example_mp("alr_envs:HoleReacherDMP-v1") + + # DetProMP + example_mp("alr_envs:HoleReacherDetPMP-v1") + + # Custom DMP + example_custom_mp() diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py index 0c6c1ce..9393421 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py @@ -2,7 +2,7 @@ from typing import Union import numpy as np -from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper class BallInACupMPWrapper(MPEnvWrapper): diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py index c1f7e07..d1ba696 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py @@ -2,7 +2,7 @@ from typing import Tuple, Union import numpy as np -from mp_env_api.envs.positional_env_wrapper import PositionalEnvWrapper +from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper class BallInACupPositionalWrapper(PositionalEnvWrapper): diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py index e69de29..5b60ba3 100644 --- a/alr_envs/utils/__init__.py +++ b/alr_envs/utils/__init__.py @@ -0,0 +1,60 @@ +import re + +import gym +from gym.envs.registration import register + + +def make( + id, + seed=1, + visualize_reward=True, + from_pixels=False, + height=84, + width=84, + camera_id=0, + frame_skip=1, + episode_length=1000, + environment_kwargs=None, + time_limit=None, + channels_first=True +): + # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py + # License: MIT + # Copyright (c) 2020 Denis Yarats + + assert re.match(r"\w+-\w+", id), "env_id does not have the following structure: 'domain_name-task_name'" + domain_name, task_name = id.split("-") + + env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' + + if from_pixels: + assert not visualize_reward, 'cannot use visualize reward when learning from pixels' + + # shorten episode length + max_episode_steps = (episode_length + frame_skip - 1) // frame_skip + + if env_id not in gym.envs.registry.env_specs: + task_kwargs = {} + if seed is not None: + task_kwargs['random'] = seed + if time_limit is not None: + task_kwargs['time_limit'] = time_limit + register( + id=env_id, + entry_point='alr_envs.utils.dmc2gym_wrapper:DMCWrapper', + kwargs=dict( + domain_name=domain_name, + task_name=task_name, + task_kwargs=task_kwargs, + environment_kwargs=environment_kwargs, + visualize_reward=visualize_reward, + from_pixels=from_pixels, + height=height, + width=width, + camera_id=camera_id, + frame_skip=frame_skip, + channels_first=channels_first, + ), + max_episode_steps=max_episode_steps, + ) + return gym.make(env_id) diff --git a/alr_envs/utils/dmc2gym_wrapper.py b/alr_envs/utils/dmc2gym_wrapper.py new file mode 100644 index 0000000..4ae1ec0 --- /dev/null +++ b/alr_envs/utils/dmc2gym_wrapper.py @@ -0,0 +1,182 @@ +# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py +# License: MIT +# Copyright (c) 2020 Denis Yarats +import matplotlib.pyplot as plt +from gym import core, spaces +from dm_control import suite, manipulation +from dm_env import specs +import numpy as np + + +def _spec_to_box(spec): + def extract_min_max(s): + assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" + dim = int(np.prod(s.shape)) + if type(s) == specs.Array: + bound = np.inf * np.ones(dim, dtype=np.float32) + return -bound, bound + elif type(s) == specs.BoundedArray: + zeros = np.zeros(dim, dtype=np.float32) + return s.minimum + zeros, s.maximum + zeros + + mins, maxs = [], [] + for s in spec: + mn, mx = extract_min_max(s) + mins.append(mn) + maxs.append(mx) + low = np.concatenate(mins, axis=0) + high = np.concatenate(maxs, axis=0) + assert low.shape == high.shape + return spaces.Box(low, high, dtype=np.float32) + + +def _flatten_obs(obs): + obs_pieces = [] + for v in obs.values(): + flat = np.array([v]) if np.isscalar(v) else v.ravel() + obs_pieces.append(flat) + return np.concatenate(obs_pieces, axis=0) + + +class DMCWrapper(core.Env): + def __init__( + self, + domain_name, + task_name, + task_kwargs=None, + visualize_reward={}, + from_pixels=False, + height=84, + width=84, + camera_id=0, + frame_skip=1, + environment_kwargs=None, + channels_first=True + ): + assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour' + self._from_pixels = from_pixels + self._height = height + self._width = width + self._camera_id = camera_id + self._frame_skip = frame_skip + self._channels_first = channels_first + + # create task + if domain_name == "manipulation": + assert not from_pixels, \ + "TODO: Vision interface for manipulation is different to suite and needs to be implemented" + self._env = manipulation.load( + environment_name=task_name, + seed=task_kwargs['random'] + ) + else: + self._env = suite.load( + domain_name=domain_name, + task_name=task_name, + task_kwargs=task_kwargs, + visualize_reward=visualize_reward, + environment_kwargs=environment_kwargs + ) + + # true and normalized action spaces + self._true_action_space = _spec_to_box([self._env.action_spec()]) + self._norm_action_space = spaces.Box( + low=-1.0, + high=1.0, + shape=self._true_action_space.shape, + dtype=np.float32 + ) + + # create observation space + if from_pixels: + shape = [3, height, width] if channels_first else [height, width, 3] + self._observation_space = spaces.Box( + low=0, high=255, shape=shape, dtype=np.uint8 + ) + else: + self._observation_space = _spec_to_box( + self._env.observation_spec().values() + ) + + self._state_space = _spec_to_box( + self._env.observation_spec().values() + ) + + self.current_state = None + + # set seed + self.seed(seed=task_kwargs.get('random', 1)) + + def __getattr__(self, name): + return getattr(self._env, name) + + def _get_obs(self, time_step): + if self._from_pixels: + obs = self.render( + mode="rgb_array", + height=self._height, + width=self._width, + camera_id=self._camera_id + ) + if self._channels_first: + obs = obs.transpose(2, 0, 1).copy() + else: + obs = _flatten_obs(time_step.observation) + return obs + + def _convert_action(self, action): + action = action.astype(float) + true_delta = self._true_action_space.high - self._true_action_space.low + norm_delta = self._norm_action_space.high - self._norm_action_space.low + action = (action - self._norm_action_space.low) / norm_delta + action = action * true_delta + self._true_action_space.low + action = action.astype(np.float32) + return action + + @property + def observation_space(self): + return self._observation_space + + @property + def state_space(self): + return self._state_space + + @property + def action_space(self): + return self._norm_action_space + + def seed(self, seed): + self._true_action_space.seed(seed) + self._norm_action_space.seed(seed) + self._observation_space.seed(seed) + + def step(self, action): + assert self._norm_action_space.contains(action) + action = self._convert_action(action) + assert self._true_action_space.contains(action) + reward = 0 + extra = {'internal_state': self._env.physics.get_state().copy()} + + for _ in range(self._frame_skip): + time_step = self._env.step(action) + reward += time_step.reward or 0 + done = time_step.last() + if done: + break + obs = self._get_obs(time_step) + self.current_state = _flatten_obs(time_step.observation) + extra['discount'] = time_step.discount + return obs, reward, done, extra + + def reset(self): + time_step = self._env.reset() + self.current_state = _flatten_obs(time_step.observation) + obs = self._get_obs(time_step) + return obs + + def render(self, mode='rgb_array', height=None, width=None, camera_id=0): + assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode + height = height or self._height + width = width or self._width + camera_id = camera_id or self._camera_id + return self._env.physics.render(height=height, width=width, camera_id=camera_id) \ No newline at end of file diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 246cd7a..2a3ccf7 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -1,20 +1,22 @@ +import logging from typing import Iterable, List, Type import gym -from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper +from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper -def make_env(env_id: str, seed: int, rank: int = 0): +def make_env_rank(env_id: str, seed: int, rank: int = 0): """ - Create a new gym environment with given seed. + TODO: Do we need this? + Generate a callable to create a new gym environment with a given seed. The rank is added to the seed and can be used for example when using vector environments. - E.g. [make_env("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments + E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments with seeds 123 through 130. Hence, testing environments should be seeded with a value which is offset by the number of training environments. - Here e.g. [make_env("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns + Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns Args: env_id: name of the environment @@ -24,18 +26,34 @@ def make_env(env_id: str, seed: int, rank: int = 0): Returns: """ - env = gym.make(env_id) - env.seed(seed + rank) - return lambda: env + return lambda: make_env(env_id, seed + rank) -def make_contextual_env(env_id, context, seed, rank): - env = gym.make(env_id, context=context) - env.seed(seed + rank) - return lambda: env +def make_env(env_id: str, seed, **kwargs): + """ + Converts an env_id to an environment with the gym API. + This also works for DeepMind Control Suite env_wrappers + for which domain name and task name are expected to be separated by "-". + Args: + env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks + **kwargs: Additional kwargs for the constructor such as pixel observations, etc. + + Returns: Gym environment + + """ + try: + # Gym + env = gym.make(env_id, **kwargs) + env.seed(seed) + except gym.error.Error: + # DMC + from alr_envs.utils import make + env = make(env_id, seed=seed, **kwargs) + + return env -def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]]): +def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs): """ Helper function for creating a wrapped gym environment using MPs. It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is @@ -44,36 +62,40 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]]): Args: env_id: name of the environment wrappers: list of wrappers (at least an MPEnvWrapper), + seed: seed of environment Returns: gym environment with all specified wrappers applied """ - _env = gym.make(env_id) + # _env = gym.make(env_id) + _env = make_env(env_id, seed, **kwargs) - assert any(issubclass(w, MPEnvWrapper) for w in wrappers) + assert any(issubclass(w, MPEnvWrapper) for w in wrappers),\ + "At least an MPEnvWrapper is required in order to leverage motion primitive environments." for w in wrappers: _env = w(_env) return _env -def make_dmp_env(env_id: str, wrappers: Iterable, **mp_kwargs): +def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, **mp_kwargs): """ This can also be used standalone for manually building a custom DMP environment. Args: env_id: base_env_name, wrappers: list of wrappers (at least an MPEnvWrapper), + seed: seed of environment mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP Returns: DMP wrapped gym env """ - _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers) + _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed) return DmpWrapper(_env, **mp_kwargs) -def make_detpmp_env(env_id: str, wrappers: Iterable, **mp_kwargs): +def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, **mp_kwargs): """ This can also be used standalone for manually building a custom Det ProMP environment. Args: @@ -85,7 +107,7 @@ def make_detpmp_env(env_id: str, wrappers: Iterable, **mp_kwargs): """ - _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers) + _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed) return DetPMPWrapper(_env, **mp_kwargs) @@ -122,3 +144,9 @@ def make_detpmp_env_helper(**kwargs): """ return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs")) + + +def make_contextual_env(env_id, context, seed, rank): + env = gym.make(env_id, context=context) + env.seed(seed + rank) + return lambda: env diff --git a/example.py b/example.py deleted file mode 100644 index 37a9f81..0000000 --- a/example.py +++ /dev/null @@ -1,117 +0,0 @@ -from collections import defaultdict - -import gym -import numpy as np - -from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist - - -def example_mujoco(): - env = gym.make('alr_envs:ALRReacher-v0') - rewards = 0 - obs = env.reset() - - # number of environment steps - for i in range(10000): - obs, reward, done, info = env.step(env.action_space.sample()) - rewards += reward - - # if i % 1 == 0: - # env.render() - - if done: - print(rewards) - rewards = 0 - obs = env.reset() - - -def example_mp(env_name="alr_envs:HoleReacherDMP-v1"): - env = gym.make(env_name) - rewards = 0 - # env.render(mode=None) - obs = env.reset() - - # number of samples/full trajectories (multiple environment steps) - for i in range(10): - obs, reward, done, info = env.step(env.action_space.sample()) - rewards += reward - - if i % 1 == 0: - # render full DMP trajectory - # render can only be called once in the beginning as well. That would render every trajectory - # Calling it after every trajectory allows to modify the mode. mode=None, disables rendering. - env.render(mode="human") - - if done: - print(rewards) - rewards = 0 - obs = env.reset() - - -def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)): - def make_env(env_id, seed, rank): - env = gym.make(env_id) - env.seed(seed + rank) - return lambda: env - - def sample(env: gym.vector.VectorEnv, n_samples=100): - # for plotting - rewards = np.zeros(n_cpu) - - # this would generate more samples than requested if n_samples % num_envs != 0 - repeat = int(np.ceil(n_samples / env.num_envs)) - vals = defaultdict(list) - for i in range(repeat): - obs, reward, done, info = envs.step(envs.action_space.sample()) - vals['obs'].append(obs) - vals['reward'].append(reward) - vals['done'].append(done) - vals['info'].append(info) - rewards += reward - if np.any(done): - print(rewards[done]) - rewards[done] = 0 - - # do not return values above threshold - return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),) - - envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed, i) for i in range(n_cpu)]) - - obs = envs.reset() - print(sample(envs, 16)) - - -def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4): - n_samples = 10 - - sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu) - dim = sampler.env.action_space.spaces[0].shape[0] - - thetas = np.random.randn(n_samples, dim) # usually form a search distribution - - _, rewards, __, ___ = sampler(thetas) - - print(rewards) - - -def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4): - sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu) - dim = sampler.env.action_space.spaces[0].shape[0] - dist = DummyDist(dim) # needs a sample function - - n_samples = 10 - new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples) - - print(new_rewards) - - -if __name__ == '__main__': - example_mp("alr_envs:HoleReacherDetPMP-v0") - # example_mujoco() - # example_mp("alr_envs:SimpleReacherDMP-v1") - # example_async("alr_envs:LongSimpleReacherDMP-v0", 4) - # example_async_contextual_sampler() - # env = gym.make("alr_envs:HoleReacherDetPMP-v1") - # env_name = "alr_envs:ALRBallInACupPDSimpleDetPMP-v0" - # example_async_sampler(env_name) - # example_mp(env_name)