added dmc2gym conversion and example how to leverage DMPs

This commit is contained in:
ottofabian 2021-06-28 17:25:53 +02:00
parent c8742e2934
commit 3b215cd877
17 changed files with 573 additions and 143 deletions

View File

@ -67,7 +67,7 @@ cd alr_envs
```bash ```bash
pip install -e . pip install -e .
``` ```
4. Use (see [example.py](./example.py)): 4. Use (see [example.py](alr_envs/examples/examples_general.py)):
```python ```python
import gym import gym

View File

@ -463,7 +463,7 @@ register(
"weights_scale": 0.2, "weights_scale": 0.2,
"zero_start": True, "zero_start": True,
"zero_goal": True, "zero_goal": True,
"p_gains": np.array([4./3., 2.4, 2.5, 5./3., 2., 2., 1.25]), "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
"d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025]) "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
} }
) )

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class HoleReacherMPWrapper(MPEnvWrapper): class HoleReacherMPWrapper(MPEnvWrapper):

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class SimpleReacherMPWrapper(MPEnvWrapper): class SimpleReacherMPWrapper(MPEnvWrapper):

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class ViaPointReacherMPWrapper(MPEnvWrapper): class ViaPointReacherMPWrapper(MPEnvWrapper):

View File

@ -0,0 +1,27 @@
from typing import Union
import numpy as np
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class BallInCupMPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[False] * 2, # cup position
[True] * 2, # ball position
[False] * 2, # cup velocity
[False] * 2, # ball velocity
])
@property
def start_pos(self) -> Union[float, int, np.ndarray]:
return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']])
@property
def dt(self) -> Union[float, int]:
# Taken from: https://github.com/deepmind/dm_control/blob/master/dm_control/suite/ball_in_cup.py#L27
return 0.02

0
alr_envs/dmc/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,73 @@
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import BallInCupMPWrapper
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
def example_dmc(env_name="fish-swim", seed=1):
env = make_env(env_name, seed)
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(2000):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
def example_custom_dmc_and_mp(seed=1):
"""
Example for running a custom motion primitive based environments based off of a dmc task.
Our already registered environments follow the same structure, but do not directly allow for modifications.
Hence, this also allows to adjust hyperparameters of the motion primitives more easily.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/
Args:
seed: seed
Returns:
"""
base_env = "ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed.
# wrappers = [HoleReacherMPWrapper]
wrappers = [BallInCupMPWrapper]
mp_kwargs = {
"num_dof": 2, # env.start_pos
"num_basis": 5,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, **mp_kwargs)
# OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, **mp_args)
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
if __name__ == '__main__':
example_dmc()
example_custom_dmc_and_mp()

View File

@ -0,0 +1,74 @@
import warnings
from collections import defaultdict
import gym
import numpy as np
from alr_envs.utils.make_env_helpers import make_env
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
def example_general(env_id='alr_envs:ALRReacher-v0', seed=1):
"""
Example for running any env in the step based setting.
This also includes DMC environments when leveraging our custom make_env function.
"""
env = make_env(env_id, seed)
rewards = 0
obs = env.reset()
print("Observation shape: ", obs.shape)
print("Action shape: ", env.action_space.shape)
# number of environment steps
for i in range(10000):
obs, reward, done, info = env.step(env.action_space.sample())
rewards += reward
# if i % 1 == 0:
# env.render()
if done:
print(rewards)
rewards = 0
obs = env.reset()
def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)):
def sample(env: gym.vector.VectorEnv, n_samples=100):
# for plotting
rewards = np.zeros(n_cpu)
# this would generate more samples than requested if n_samples % num_envs != 0
repeat = int(np.ceil(n_samples / env.num_envs))
vals = defaultdict(list)
for i in range(repeat):
obs, reward, done, info = envs.step(envs.action_space.sample())
vals['obs'].append(obs)
vals['reward'].append(reward)
vals['done'].append(done)
vals['info'].append(info)
rewards += reward
if np.any(done):
print(rewards[done])
rewards[done] = 0
# do not return values above threshold
return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)
from alr_envs.utils.make_env_helpers import make_env_rank
envs = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)])
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
obs = envs.reset()
print(sample(envs, 16))
if __name__ == '__main__':
# DMC
# example_general("fish-swim")
# custom mujoco env
# example_general("alr_envs:ALRReacher-v0")
example_general("ball_in_cup-catch")

View File

@ -0,0 +1,103 @@
from alr_envs import HoleReacherMPWrapper
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1):
"""
Example for running a motion primitive based environment, which is already registered
Args:
env_name: DMP env_id
seed: seed
Returns:
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
env = make_env(env_name, seed)
rewards = 0
# env.render(mode=None)
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if i % 1 == 0:
# render full DMP trajectory
# render can only be called once in the beginning as well. That would render every trajectory
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
env.render(mode="human")
if done:
print(rewards)
rewards = 0
obs = env.reset()
def example_custom_mp(seed=1):
"""
Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure, but do not directly allow for modifications.
Hence, this also allows to adjust hyperparameters of the motion primitives more easily.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/
Args:
seed: seed
Returns:
"""
base_env = "alr_envs:HoleReacher-v1"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [HoleReacherMPWrapper]
mp_kwargs = {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, **mp_kwargs)
# OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed)
rewards = 0
# env.render(mode=None)
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if i % 1 == 0:
# render full DMP trajectory
# render can only be called once in the beginning as well. That would render every trajectory
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
env.render(mode="human")
if done:
print(rewards)
rewards = 0
obs = env.reset()
if __name__ == '__main__':
# DMP
example_mp("alr_envs:HoleReacherDMP-v1")
# DetProMP
example_mp("alr_envs:HoleReacherDetPMP-v1")
# Custom DMP
example_custom_mp()

View File

@ -2,7 +2,7 @@ from typing import Union
import numpy as np import numpy as np
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class BallInACupMPWrapper(MPEnvWrapper): class BallInACupMPWrapper(MPEnvWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from mp_env_api.envs.positional_env_wrapper import PositionalEnvWrapper from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
class BallInACupPositionalWrapper(PositionalEnvWrapper): class BallInACupPositionalWrapper(PositionalEnvWrapper):

View File

@ -0,0 +1,60 @@
import re
import gym
from gym.envs.registration import register
def make(
id,
seed=1,
visualize_reward=True,
from_pixels=False,
height=84,
width=84,
camera_id=0,
frame_skip=1,
episode_length=1000,
environment_kwargs=None,
time_limit=None,
channels_first=True
):
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
# License: MIT
# Copyright (c) 2020 Denis Yarats
assert re.match(r"\w+-\w+", id), "env_id does not have the following structure: 'domain_name-task_name'"
domain_name, task_name = id.split("-")
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
if from_pixels:
assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
# shorten episode length
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
if env_id not in gym.envs.registry.env_specs:
task_kwargs = {}
if seed is not None:
task_kwargs['random'] = seed
if time_limit is not None:
task_kwargs['time_limit'] = time_limit
register(
id=env_id,
entry_point='alr_envs.utils.dmc2gym_wrapper:DMCWrapper',
kwargs=dict(
domain_name=domain_name,
task_name=task_name,
task_kwargs=task_kwargs,
environment_kwargs=environment_kwargs,
visualize_reward=visualize_reward,
from_pixels=from_pixels,
height=height,
width=width,
camera_id=camera_id,
frame_skip=frame_skip,
channels_first=channels_first,
),
max_episode_steps=max_episode_steps,
)
return gym.make(env_id)

View File

@ -0,0 +1,182 @@
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
# License: MIT
# Copyright (c) 2020 Denis Yarats
import matplotlib.pyplot as plt
from gym import core, spaces
from dm_control import suite, manipulation
from dm_env import specs
import numpy as np
def _spec_to_box(spec):
def extract_min_max(s):
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
dim = int(np.prod(s.shape))
if type(s) == specs.Array:
bound = np.inf * np.ones(dim, dtype=np.float32)
return -bound, bound
elif type(s) == specs.BoundedArray:
zeros = np.zeros(dim, dtype=np.float32)
return s.minimum + zeros, s.maximum + zeros
mins, maxs = [], []
for s in spec:
mn, mx = extract_min_max(s)
mins.append(mn)
maxs.append(mx)
low = np.concatenate(mins, axis=0)
high = np.concatenate(maxs, axis=0)
assert low.shape == high.shape
return spaces.Box(low, high, dtype=np.float32)
def _flatten_obs(obs):
obs_pieces = []
for v in obs.values():
flat = np.array([v]) if np.isscalar(v) else v.ravel()
obs_pieces.append(flat)
return np.concatenate(obs_pieces, axis=0)
class DMCWrapper(core.Env):
def __init__(
self,
domain_name,
task_name,
task_kwargs=None,
visualize_reward={},
from_pixels=False,
height=84,
width=84,
camera_id=0,
frame_skip=1,
environment_kwargs=None,
channels_first=True
):
assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour'
self._from_pixels = from_pixels
self._height = height
self._width = width
self._camera_id = camera_id
self._frame_skip = frame_skip
self._channels_first = channels_first
# create task
if domain_name == "manipulation":
assert not from_pixels, \
"TODO: Vision interface for manipulation is different to suite and needs to be implemented"
self._env = manipulation.load(
environment_name=task_name,
seed=task_kwargs['random']
)
else:
self._env = suite.load(
domain_name=domain_name,
task_name=task_name,
task_kwargs=task_kwargs,
visualize_reward=visualize_reward,
environment_kwargs=environment_kwargs
)
# true and normalized action spaces
self._true_action_space = _spec_to_box([self._env.action_spec()])
self._norm_action_space = spaces.Box(
low=-1.0,
high=1.0,
shape=self._true_action_space.shape,
dtype=np.float32
)
# create observation space
if from_pixels:
shape = [3, height, width] if channels_first else [height, width, 3]
self._observation_space = spaces.Box(
low=0, high=255, shape=shape, dtype=np.uint8
)
else:
self._observation_space = _spec_to_box(
self._env.observation_spec().values()
)
self._state_space = _spec_to_box(
self._env.observation_spec().values()
)
self.current_state = None
# set seed
self.seed(seed=task_kwargs.get('random', 1))
def __getattr__(self, name):
return getattr(self._env, name)
def _get_obs(self, time_step):
if self._from_pixels:
obs = self.render(
mode="rgb_array",
height=self._height,
width=self._width,
camera_id=self._camera_id
)
if self._channels_first:
obs = obs.transpose(2, 0, 1).copy()
else:
obs = _flatten_obs(time_step.observation)
return obs
def _convert_action(self, action):
action = action.astype(float)
true_delta = self._true_action_space.high - self._true_action_space.low
norm_delta = self._norm_action_space.high - self._norm_action_space.low
action = (action - self._norm_action_space.low) / norm_delta
action = action * true_delta + self._true_action_space.low
action = action.astype(np.float32)
return action
@property
def observation_space(self):
return self._observation_space
@property
def state_space(self):
return self._state_space
@property
def action_space(self):
return self._norm_action_space
def seed(self, seed):
self._true_action_space.seed(seed)
self._norm_action_space.seed(seed)
self._observation_space.seed(seed)
def step(self, action):
assert self._norm_action_space.contains(action)
action = self._convert_action(action)
assert self._true_action_space.contains(action)
reward = 0
extra = {'internal_state': self._env.physics.get_state().copy()}
for _ in range(self._frame_skip):
time_step = self._env.step(action)
reward += time_step.reward or 0
done = time_step.last()
if done:
break
obs = self._get_obs(time_step)
self.current_state = _flatten_obs(time_step.observation)
extra['discount'] = time_step.discount
return obs, reward, done, extra
def reset(self):
time_step = self._env.reset()
self.current_state = _flatten_obs(time_step.observation)
obs = self._get_obs(time_step)
return obs
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
height = height or self._height
width = width or self._width
camera_id = camera_id or self._camera_id
return self._env.physics.render(height=height, width=width, camera_id=camera_id)

View File

@ -1,20 +1,22 @@
import logging
from typing import Iterable, List, Type from typing import Iterable, List, Type
import gym import gym
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
def make_env(env_id: str, seed: int, rank: int = 0): def make_env_rank(env_id: str, seed: int, rank: int = 0):
""" """
Create a new gym environment with given seed. TODO: Do we need this?
Generate a callable to create a new gym environment with a given seed.
The rank is added to the seed and can be used for example when using vector environments. The rank is added to the seed and can be used for example when using vector environments.
E.g. [make_env("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
with seeds 123 through 130. with seeds 123 through 130.
Hence, testing environments should be seeded with a value which is offset by the number of training environments. Hence, testing environments should be seeded with a value which is offset by the number of training environments.
Here e.g. [make_env("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
Args: Args:
env_id: name of the environment env_id: name of the environment
@ -24,18 +26,34 @@ def make_env(env_id: str, seed: int, rank: int = 0):
Returns: Returns:
""" """
env = gym.make(env_id) return lambda: make_env(env_id, seed + rank)
env.seed(seed + rank)
return lambda: env
def make_contextual_env(env_id, context, seed, rank): def make_env(env_id: str, seed, **kwargs):
env = gym.make(env_id, context=context) """
env.seed(seed + rank) Converts an env_id to an environment with the gym API.
return lambda: env This also works for DeepMind Control Suite env_wrappers
for which domain name and task name are expected to be separated by "-".
Args:
env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
**kwargs: Additional kwargs for the constructor such as pixel observations, etc.
Returns: Gym environment
"""
try:
# Gym
env = gym.make(env_id, **kwargs)
env.seed(seed)
except gym.error.Error:
# DMC
from alr_envs.utils import make
env = make(env_id, seed=seed, **kwargs)
return env
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]]): def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
""" """
Helper function for creating a wrapped gym environment using MPs. Helper function for creating a wrapped gym environment using MPs.
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
@ -44,36 +62,40 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]]):
Args: Args:
env_id: name of the environment env_id: name of the environment
wrappers: list of wrappers (at least an MPEnvWrapper), wrappers: list of wrappers (at least an MPEnvWrapper),
seed: seed of environment
Returns: gym environment with all specified wrappers applied Returns: gym environment with all specified wrappers applied
""" """
_env = gym.make(env_id) # _env = gym.make(env_id)
_env = make_env(env_id, seed, **kwargs)
assert any(issubclass(w, MPEnvWrapper) for w in wrappers) assert any(issubclass(w, MPEnvWrapper) for w in wrappers),\
"At least an MPEnvWrapper is required in order to leverage motion primitive environments."
for w in wrappers: for w in wrappers:
_env = w(_env) _env = w(_env)
return _env return _env
def make_dmp_env(env_id: str, wrappers: Iterable, **mp_kwargs): def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, **mp_kwargs):
""" """
This can also be used standalone for manually building a custom DMP environment. This can also be used standalone for manually building a custom DMP environment.
Args: Args:
env_id: base_env_name, env_id: base_env_name,
wrappers: list of wrappers (at least an MPEnvWrapper), wrappers: list of wrappers (at least an MPEnvWrapper),
seed: seed of environment
mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
Returns: DMP wrapped gym env Returns: DMP wrapped gym env
""" """
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers) _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed)
return DmpWrapper(_env, **mp_kwargs) return DmpWrapper(_env, **mp_kwargs)
def make_detpmp_env(env_id: str, wrappers: Iterable, **mp_kwargs): def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, **mp_kwargs):
""" """
This can also be used standalone for manually building a custom Det ProMP environment. This can also be used standalone for manually building a custom Det ProMP environment.
Args: Args:
@ -85,7 +107,7 @@ def make_detpmp_env(env_id: str, wrappers: Iterable, **mp_kwargs):
""" """
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers) _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed)
return DetPMPWrapper(_env, **mp_kwargs) return DetPMPWrapper(_env, **mp_kwargs)
@ -122,3 +144,9 @@ def make_detpmp_env_helper(**kwargs):
""" """
return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs")) return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs"))
def make_contextual_env(env_id, context, seed, rank):
env = gym.make(env_id, context=context)
env.seed(seed + rank)
return lambda: env

View File

@ -1,117 +0,0 @@
from collections import defaultdict
import gym
import numpy as np
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
def example_mujoco():
env = gym.make('alr_envs:ALRReacher-v0')
rewards = 0
obs = env.reset()
# number of environment steps
for i in range(10000):
obs, reward, done, info = env.step(env.action_space.sample())
rewards += reward
# if i % 1 == 0:
# env.render()
if done:
print(rewards)
rewards = 0
obs = env.reset()
def example_mp(env_name="alr_envs:HoleReacherDMP-v1"):
env = gym.make(env_name)
rewards = 0
# env.render(mode=None)
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
obs, reward, done, info = env.step(env.action_space.sample())
rewards += reward
if i % 1 == 0:
# render full DMP trajectory
# render can only be called once in the beginning as well. That would render every trajectory
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
env.render(mode="human")
if done:
print(rewards)
rewards = 0
obs = env.reset()
def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)):
def make_env(env_id, seed, rank):
env = gym.make(env_id)
env.seed(seed + rank)
return lambda: env
def sample(env: gym.vector.VectorEnv, n_samples=100):
# for plotting
rewards = np.zeros(n_cpu)
# this would generate more samples than requested if n_samples % num_envs != 0
repeat = int(np.ceil(n_samples / env.num_envs))
vals = defaultdict(list)
for i in range(repeat):
obs, reward, done, info = envs.step(envs.action_space.sample())
vals['obs'].append(obs)
vals['reward'].append(reward)
vals['done'].append(done)
vals['info'].append(info)
rewards += reward
if np.any(done):
print(rewards[done])
rewards[done] = 0
# do not return values above threshold
return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)
envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed, i) for i in range(n_cpu)])
obs = envs.reset()
print(sample(envs, 16))
def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4):
n_samples = 10
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
dim = sampler.env.action_space.spaces[0].shape[0]
thetas = np.random.randn(n_samples, dim) # usually form a search distribution
_, rewards, __, ___ = sampler(thetas)
print(rewards)
def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4):
sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu)
dim = sampler.env.action_space.spaces[0].shape[0]
dist = DummyDist(dim) # needs a sample function
n_samples = 10
new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples)
print(new_rewards)
if __name__ == '__main__':
example_mp("alr_envs:HoleReacherDetPMP-v0")
# example_mujoco()
# example_mp("alr_envs:SimpleReacherDMP-v1")
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
# example_async_contextual_sampler()
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
# env_name = "alr_envs:ALRBallInACupPDSimpleDetPMP-v0"
# example_async_sampler(env_name)
# example_mp(env_name)