added dmc2gym conversion and example how to leverage DMPs
This commit is contained in:
parent
c8742e2934
commit
3b215cd877
@ -67,7 +67,7 @@ cd alr_envs
|
|||||||
```bash
|
```bash
|
||||||
pip install -e .
|
pip install -e .
|
||||||
```
|
```
|
||||||
4. Use (see [example.py](./example.py)):
|
4. Use (see [example.py](alr_envs/examples/examples_general.py)):
|
||||||
```python
|
```python
|
||||||
import gym
|
import gym
|
||||||
|
|
||||||
|
@ -463,7 +463,7 @@ register(
|
|||||||
"weights_scale": 0.2,
|
"weights_scale": 0.2,
|
||||||
"zero_start": True,
|
"zero_start": True,
|
||||||
"zero_goal": True,
|
"zero_goal": True,
|
||||||
"p_gains": np.array([4./3., 2.4, 2.5, 5./3., 2., 2., 1.25]),
|
"p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
|
||||||
"d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
|
"d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -2,7 +2,7 @@ from typing import Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class HoleReacherMPWrapper(MPEnvWrapper):
|
class HoleReacherMPWrapper(MPEnvWrapper):
|
||||||
|
@ -2,7 +2,7 @@ from typing import Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class SimpleReacherMPWrapper(MPEnvWrapper):
|
class SimpleReacherMPWrapper(MPEnvWrapper):
|
||||||
|
@ -2,7 +2,7 @@ from typing import Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class ViaPointReacherMPWrapper(MPEnvWrapper):
|
class ViaPointReacherMPWrapper(MPEnvWrapper):
|
||||||
|
27
alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py
Normal file
27
alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class BallInCupMPWrapper(MPEnvWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def active_obs(self):
|
||||||
|
# Besides the ball position, the environment is always set to 0.
|
||||||
|
return np.hstack([
|
||||||
|
[False] * 2, # cup position
|
||||||
|
[True] * 2, # ball position
|
||||||
|
[False] * 2, # cup velocity
|
||||||
|
[False] * 2, # ball velocity
|
||||||
|
])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def start_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
# Taken from: https://github.com/deepmind/dm_control/blob/master/dm_control/suite/ball_in_cup.py#L27
|
||||||
|
return 0.02
|
0
alr_envs/dmc/__init__.py
Normal file
0
alr_envs/dmc/__init__.py
Normal file
0
alr_envs/examples/__init__.py
Normal file
0
alr_envs/examples/__init__.py
Normal file
73
alr_envs/examples/examples_dmc.py
Normal file
73
alr_envs/examples/examples_dmc.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import BallInCupMPWrapper
|
||||||
|
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
||||||
|
|
||||||
|
|
||||||
|
def example_dmc(env_name="fish-swim", seed=1):
|
||||||
|
env = make_env(env_name, seed)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(2000):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def example_custom_dmc_and_mp(seed=1):
|
||||||
|
"""
|
||||||
|
Example for running a custom motion primitive based environments based off of a dmc task.
|
||||||
|
Our already registered environments follow the same structure, but do not directly allow for modifications.
|
||||||
|
Hence, this also allows to adjust hyperparameters of the motion primitives more easily.
|
||||||
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||||
|
for our repo: https://github.com/ALRhub/alr_envs/
|
||||||
|
Args:
|
||||||
|
seed: seed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
base_env = "ball_in_cup-catch"
|
||||||
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||||
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
|
# wrappers = [HoleReacherMPWrapper]
|
||||||
|
wrappers = [BallInCupMPWrapper]
|
||||||
|
mp_kwargs = {
|
||||||
|
"num_dof": 2, # env.start_pos
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"learn_goal": True,
|
||||||
|
"alpha_phase": 2,
|
||||||
|
"bandwidth_factor": 2,
|
||||||
|
"policy_type": "velocity",
|
||||||
|
"weights_scale": 50,
|
||||||
|
"goal_scale": 0.1
|
||||||
|
}
|
||||||
|
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, **mp_kwargs)
|
||||||
|
# OR for a deterministic ProMP:
|
||||||
|
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, **mp_args)
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(10):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
example_dmc()
|
||||||
|
example_custom_dmc_and_mp()
|
74
alr_envs/examples/examples_general.py
Normal file
74
alr_envs/examples/examples_general.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
import warnings
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import gym
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from alr_envs.utils.make_env_helpers import make_env
|
||||||
|
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
|
||||||
|
|
||||||
|
|
||||||
|
def example_general(env_id='alr_envs:ALRReacher-v0', seed=1):
|
||||||
|
"""
|
||||||
|
Example for running any env in the step based setting.
|
||||||
|
This also includes DMC environments when leveraging our custom make_env function.
|
||||||
|
"""
|
||||||
|
|
||||||
|
env = make_env(env_id, seed)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
print("Observation shape: ", obs.shape)
|
||||||
|
print("Action shape: ", env.action_space.shape)
|
||||||
|
|
||||||
|
# number of environment steps
|
||||||
|
for i in range(10000):
|
||||||
|
obs, reward, done, info = env.step(env.action_space.sample())
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
# if i % 1 == 0:
|
||||||
|
# env.render()
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)):
|
||||||
|
def sample(env: gym.vector.VectorEnv, n_samples=100):
|
||||||
|
# for plotting
|
||||||
|
rewards = np.zeros(n_cpu)
|
||||||
|
|
||||||
|
# this would generate more samples than requested if n_samples % num_envs != 0
|
||||||
|
repeat = int(np.ceil(n_samples / env.num_envs))
|
||||||
|
vals = defaultdict(list)
|
||||||
|
for i in range(repeat):
|
||||||
|
obs, reward, done, info = envs.step(envs.action_space.sample())
|
||||||
|
vals['obs'].append(obs)
|
||||||
|
vals['reward'].append(reward)
|
||||||
|
vals['done'].append(done)
|
||||||
|
vals['info'].append(info)
|
||||||
|
rewards += reward
|
||||||
|
if np.any(done):
|
||||||
|
print(rewards[done])
|
||||||
|
rewards[done] = 0
|
||||||
|
|
||||||
|
# do not return values above threshold
|
||||||
|
return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)
|
||||||
|
|
||||||
|
from alr_envs.utils.make_env_helpers import make_env_rank
|
||||||
|
envs = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)])
|
||||||
|
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
|
||||||
|
|
||||||
|
obs = envs.reset()
|
||||||
|
print(sample(envs, 16))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# DMC
|
||||||
|
# example_general("fish-swim")
|
||||||
|
|
||||||
|
# custom mujoco env
|
||||||
|
# example_general("alr_envs:ALRReacher-v0")
|
||||||
|
|
||||||
|
example_general("ball_in_cup-catch")
|
103
alr_envs/examples/examples_motion_primitives.py
Normal file
103
alr_envs/examples/examples_motion_primitives.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
from alr_envs import HoleReacherMPWrapper
|
||||||
|
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
|
||||||
|
|
||||||
|
|
||||||
|
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1):
|
||||||
|
"""
|
||||||
|
Example for running a motion primitive based environment, which is already registered
|
||||||
|
Args:
|
||||||
|
env_name: DMP env_id
|
||||||
|
seed: seed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
||||||
|
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
|
||||||
|
env = make_env(env_name, seed)
|
||||||
|
rewards = 0
|
||||||
|
# env.render(mode=None)
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(10):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if i % 1 == 0:
|
||||||
|
# render full DMP trajectory
|
||||||
|
# render can only be called once in the beginning as well. That would render every trajectory
|
||||||
|
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
|
||||||
|
env.render(mode="human")
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
def example_custom_mp(seed=1):
|
||||||
|
"""
|
||||||
|
Example for running a custom motion primitive based environments.
|
||||||
|
Our already registered environments follow the same structure, but do not directly allow for modifications.
|
||||||
|
Hence, this also allows to adjust hyperparameters of the motion primitives more easily.
|
||||||
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||||
|
for our repo: https://github.com/ALRhub/alr_envs/
|
||||||
|
Args:
|
||||||
|
seed: seed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
base_env = "alr_envs:HoleReacher-v1"
|
||||||
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||||
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
|
wrappers = [HoleReacherMPWrapper]
|
||||||
|
mp_kwargs = {
|
||||||
|
"num_dof": 5,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"learn_goal": True,
|
||||||
|
"alpha_phase": 2,
|
||||||
|
"bandwidth_factor": 2,
|
||||||
|
"policy_type": "velocity",
|
||||||
|
"weights_scale": 50,
|
||||||
|
"goal_scale": 0.1
|
||||||
|
}
|
||||||
|
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, **mp_kwargs)
|
||||||
|
# OR for a deterministic ProMP:
|
||||||
|
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed)
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
# env.render(mode=None)
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(10):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if i % 1 == 0:
|
||||||
|
# render full DMP trajectory
|
||||||
|
# render can only be called once in the beginning as well. That would render every trajectory
|
||||||
|
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
|
||||||
|
env.render(mode="human")
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# DMP
|
||||||
|
example_mp("alr_envs:HoleReacherDMP-v1")
|
||||||
|
|
||||||
|
# DetProMP
|
||||||
|
example_mp("alr_envs:HoleReacherDetPMP-v1")
|
||||||
|
|
||||||
|
# Custom DMP
|
||||||
|
example_custom_mp()
|
@ -2,7 +2,7 @@ from typing import Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class BallInACupMPWrapper(MPEnvWrapper):
|
class BallInACupMPWrapper(MPEnvWrapper):
|
||||||
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from mp_env_api.envs.positional_env_wrapper import PositionalEnvWrapper
|
from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
class BallInACupPositionalWrapper(PositionalEnvWrapper):
|
class BallInACupPositionalWrapper(PositionalEnvWrapper):
|
||||||
|
@ -0,0 +1,60 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import gym
|
||||||
|
from gym.envs.registration import register
|
||||||
|
|
||||||
|
|
||||||
|
def make(
|
||||||
|
id,
|
||||||
|
seed=1,
|
||||||
|
visualize_reward=True,
|
||||||
|
from_pixels=False,
|
||||||
|
height=84,
|
||||||
|
width=84,
|
||||||
|
camera_id=0,
|
||||||
|
frame_skip=1,
|
||||||
|
episode_length=1000,
|
||||||
|
environment_kwargs=None,
|
||||||
|
time_limit=None,
|
||||||
|
channels_first=True
|
||||||
|
):
|
||||||
|
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
|
||||||
|
# License: MIT
|
||||||
|
# Copyright (c) 2020 Denis Yarats
|
||||||
|
|
||||||
|
assert re.match(r"\w+-\w+", id), "env_id does not have the following structure: 'domain_name-task_name'"
|
||||||
|
domain_name, task_name = id.split("-")
|
||||||
|
|
||||||
|
env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1'
|
||||||
|
|
||||||
|
if from_pixels:
|
||||||
|
assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
|
||||||
|
|
||||||
|
# shorten episode length
|
||||||
|
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
|
||||||
|
|
||||||
|
if env_id not in gym.envs.registry.env_specs:
|
||||||
|
task_kwargs = {}
|
||||||
|
if seed is not None:
|
||||||
|
task_kwargs['random'] = seed
|
||||||
|
if time_limit is not None:
|
||||||
|
task_kwargs['time_limit'] = time_limit
|
||||||
|
register(
|
||||||
|
id=env_id,
|
||||||
|
entry_point='alr_envs.utils.dmc2gym_wrapper:DMCWrapper',
|
||||||
|
kwargs=dict(
|
||||||
|
domain_name=domain_name,
|
||||||
|
task_name=task_name,
|
||||||
|
task_kwargs=task_kwargs,
|
||||||
|
environment_kwargs=environment_kwargs,
|
||||||
|
visualize_reward=visualize_reward,
|
||||||
|
from_pixels=from_pixels,
|
||||||
|
height=height,
|
||||||
|
width=width,
|
||||||
|
camera_id=camera_id,
|
||||||
|
frame_skip=frame_skip,
|
||||||
|
channels_first=channels_first,
|
||||||
|
),
|
||||||
|
max_episode_steps=max_episode_steps,
|
||||||
|
)
|
||||||
|
return gym.make(env_id)
|
182
alr_envs/utils/dmc2gym_wrapper.py
Normal file
182
alr_envs/utils/dmc2gym_wrapper.py
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
|
||||||
|
# License: MIT
|
||||||
|
# Copyright (c) 2020 Denis Yarats
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from gym import core, spaces
|
||||||
|
from dm_control import suite, manipulation
|
||||||
|
from dm_env import specs
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def _spec_to_box(spec):
|
||||||
|
def extract_min_max(s):
|
||||||
|
assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found"
|
||||||
|
dim = int(np.prod(s.shape))
|
||||||
|
if type(s) == specs.Array:
|
||||||
|
bound = np.inf * np.ones(dim, dtype=np.float32)
|
||||||
|
return -bound, bound
|
||||||
|
elif type(s) == specs.BoundedArray:
|
||||||
|
zeros = np.zeros(dim, dtype=np.float32)
|
||||||
|
return s.minimum + zeros, s.maximum + zeros
|
||||||
|
|
||||||
|
mins, maxs = [], []
|
||||||
|
for s in spec:
|
||||||
|
mn, mx = extract_min_max(s)
|
||||||
|
mins.append(mn)
|
||||||
|
maxs.append(mx)
|
||||||
|
low = np.concatenate(mins, axis=0)
|
||||||
|
high = np.concatenate(maxs, axis=0)
|
||||||
|
assert low.shape == high.shape
|
||||||
|
return spaces.Box(low, high, dtype=np.float32)
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten_obs(obs):
|
||||||
|
obs_pieces = []
|
||||||
|
for v in obs.values():
|
||||||
|
flat = np.array([v]) if np.isscalar(v) else v.ravel()
|
||||||
|
obs_pieces.append(flat)
|
||||||
|
return np.concatenate(obs_pieces, axis=0)
|
||||||
|
|
||||||
|
|
||||||
|
class DMCWrapper(core.Env):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
domain_name,
|
||||||
|
task_name,
|
||||||
|
task_kwargs=None,
|
||||||
|
visualize_reward={},
|
||||||
|
from_pixels=False,
|
||||||
|
height=84,
|
||||||
|
width=84,
|
||||||
|
camera_id=0,
|
||||||
|
frame_skip=1,
|
||||||
|
environment_kwargs=None,
|
||||||
|
channels_first=True
|
||||||
|
):
|
||||||
|
assert 'random' in task_kwargs, 'please specify a seed, for deterministic behaviour'
|
||||||
|
self._from_pixels = from_pixels
|
||||||
|
self._height = height
|
||||||
|
self._width = width
|
||||||
|
self._camera_id = camera_id
|
||||||
|
self._frame_skip = frame_skip
|
||||||
|
self._channels_first = channels_first
|
||||||
|
|
||||||
|
# create task
|
||||||
|
if domain_name == "manipulation":
|
||||||
|
assert not from_pixels, \
|
||||||
|
"TODO: Vision interface for manipulation is different to suite and needs to be implemented"
|
||||||
|
self._env = manipulation.load(
|
||||||
|
environment_name=task_name,
|
||||||
|
seed=task_kwargs['random']
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._env = suite.load(
|
||||||
|
domain_name=domain_name,
|
||||||
|
task_name=task_name,
|
||||||
|
task_kwargs=task_kwargs,
|
||||||
|
visualize_reward=visualize_reward,
|
||||||
|
environment_kwargs=environment_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
# true and normalized action spaces
|
||||||
|
self._true_action_space = _spec_to_box([self._env.action_spec()])
|
||||||
|
self._norm_action_space = spaces.Box(
|
||||||
|
low=-1.0,
|
||||||
|
high=1.0,
|
||||||
|
shape=self._true_action_space.shape,
|
||||||
|
dtype=np.float32
|
||||||
|
)
|
||||||
|
|
||||||
|
# create observation space
|
||||||
|
if from_pixels:
|
||||||
|
shape = [3, height, width] if channels_first else [height, width, 3]
|
||||||
|
self._observation_space = spaces.Box(
|
||||||
|
low=0, high=255, shape=shape, dtype=np.uint8
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._observation_space = _spec_to_box(
|
||||||
|
self._env.observation_spec().values()
|
||||||
|
)
|
||||||
|
|
||||||
|
self._state_space = _spec_to_box(
|
||||||
|
self._env.observation_spec().values()
|
||||||
|
)
|
||||||
|
|
||||||
|
self.current_state = None
|
||||||
|
|
||||||
|
# set seed
|
||||||
|
self.seed(seed=task_kwargs.get('random', 1))
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return getattr(self._env, name)
|
||||||
|
|
||||||
|
def _get_obs(self, time_step):
|
||||||
|
if self._from_pixels:
|
||||||
|
obs = self.render(
|
||||||
|
mode="rgb_array",
|
||||||
|
height=self._height,
|
||||||
|
width=self._width,
|
||||||
|
camera_id=self._camera_id
|
||||||
|
)
|
||||||
|
if self._channels_first:
|
||||||
|
obs = obs.transpose(2, 0, 1).copy()
|
||||||
|
else:
|
||||||
|
obs = _flatten_obs(time_step.observation)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def _convert_action(self, action):
|
||||||
|
action = action.astype(float)
|
||||||
|
true_delta = self._true_action_space.high - self._true_action_space.low
|
||||||
|
norm_delta = self._norm_action_space.high - self._norm_action_space.low
|
||||||
|
action = (action - self._norm_action_space.low) / norm_delta
|
||||||
|
action = action * true_delta + self._true_action_space.low
|
||||||
|
action = action.astype(np.float32)
|
||||||
|
return action
|
||||||
|
|
||||||
|
@property
|
||||||
|
def observation_space(self):
|
||||||
|
return self._observation_space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state_space(self):
|
||||||
|
return self._state_space
|
||||||
|
|
||||||
|
@property
|
||||||
|
def action_space(self):
|
||||||
|
return self._norm_action_space
|
||||||
|
|
||||||
|
def seed(self, seed):
|
||||||
|
self._true_action_space.seed(seed)
|
||||||
|
self._norm_action_space.seed(seed)
|
||||||
|
self._observation_space.seed(seed)
|
||||||
|
|
||||||
|
def step(self, action):
|
||||||
|
assert self._norm_action_space.contains(action)
|
||||||
|
action = self._convert_action(action)
|
||||||
|
assert self._true_action_space.contains(action)
|
||||||
|
reward = 0
|
||||||
|
extra = {'internal_state': self._env.physics.get_state().copy()}
|
||||||
|
|
||||||
|
for _ in range(self._frame_skip):
|
||||||
|
time_step = self._env.step(action)
|
||||||
|
reward += time_step.reward or 0
|
||||||
|
done = time_step.last()
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
obs = self._get_obs(time_step)
|
||||||
|
self.current_state = _flatten_obs(time_step.observation)
|
||||||
|
extra['discount'] = time_step.discount
|
||||||
|
return obs, reward, done, extra
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
time_step = self._env.reset()
|
||||||
|
self.current_state = _flatten_obs(time_step.observation)
|
||||||
|
obs = self._get_obs(time_step)
|
||||||
|
return obs
|
||||||
|
|
||||||
|
def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
|
||||||
|
assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode
|
||||||
|
height = height or self._height
|
||||||
|
width = width or self._width
|
||||||
|
camera_id = camera_id or self._camera_id
|
||||||
|
return self._env.physics.render(height=height, width=width, camera_id=camera_id)
|
@ -1,20 +1,22 @@
|
|||||||
|
import logging
|
||||||
from typing import Iterable, List, Type
|
from typing import Iterable, List, Type
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
|
|
||||||
from mp_env_api.envs.mp_env_wrapper import MPEnvWrapper
|
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
|
||||||
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
||||||
|
|
||||||
|
|
||||||
def make_env(env_id: str, seed: int, rank: int = 0):
|
def make_env_rank(env_id: str, seed: int, rank: int = 0):
|
||||||
"""
|
"""
|
||||||
Create a new gym environment with given seed.
|
TODO: Do we need this?
|
||||||
|
Generate a callable to create a new gym environment with a given seed.
|
||||||
The rank is added to the seed and can be used for example when using vector environments.
|
The rank is added to the seed and can be used for example when using vector environments.
|
||||||
E.g. [make_env("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
|
||||||
with seeds 123 through 130.
|
with seeds 123 through 130.
|
||||||
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
|
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
|
||||||
Here e.g. [make_env("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
env_id: name of the environment
|
env_id: name of the environment
|
||||||
@ -24,18 +26,34 @@ def make_env(env_id: str, seed: int, rank: int = 0):
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
env = gym.make(env_id)
|
return lambda: make_env(env_id, seed + rank)
|
||||||
env.seed(seed + rank)
|
|
||||||
return lambda: env
|
|
||||||
|
|
||||||
|
|
||||||
def make_contextual_env(env_id, context, seed, rank):
|
def make_env(env_id: str, seed, **kwargs):
|
||||||
env = gym.make(env_id, context=context)
|
"""
|
||||||
env.seed(seed + rank)
|
Converts an env_id to an environment with the gym API.
|
||||||
return lambda: env
|
This also works for DeepMind Control Suite env_wrappers
|
||||||
|
for which domain name and task name are expected to be separated by "-".
|
||||||
|
Args:
|
||||||
|
env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
|
||||||
|
**kwargs: Additional kwargs for the constructor such as pixel observations, etc.
|
||||||
|
|
||||||
|
Returns: Gym environment
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Gym
|
||||||
|
env = gym.make(env_id, **kwargs)
|
||||||
|
env.seed(seed)
|
||||||
|
except gym.error.Error:
|
||||||
|
# DMC
|
||||||
|
from alr_envs.utils import make
|
||||||
|
env = make(env_id, seed=seed, **kwargs)
|
||||||
|
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]]):
|
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
|
||||||
"""
|
"""
|
||||||
Helper function for creating a wrapped gym environment using MPs.
|
Helper function for creating a wrapped gym environment using MPs.
|
||||||
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
|
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
|
||||||
@ -44,36 +62,40 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]]):
|
|||||||
Args:
|
Args:
|
||||||
env_id: name of the environment
|
env_id: name of the environment
|
||||||
wrappers: list of wrappers (at least an MPEnvWrapper),
|
wrappers: list of wrappers (at least an MPEnvWrapper),
|
||||||
|
seed: seed of environment
|
||||||
|
|
||||||
Returns: gym environment with all specified wrappers applied
|
Returns: gym environment with all specified wrappers applied
|
||||||
|
|
||||||
"""
|
"""
|
||||||
_env = gym.make(env_id)
|
# _env = gym.make(env_id)
|
||||||
|
_env = make_env(env_id, seed, **kwargs)
|
||||||
|
|
||||||
assert any(issubclass(w, MPEnvWrapper) for w in wrappers)
|
assert any(issubclass(w, MPEnvWrapper) for w in wrappers),\
|
||||||
|
"At least an MPEnvWrapper is required in order to leverage motion primitive environments."
|
||||||
for w in wrappers:
|
for w in wrappers:
|
||||||
_env = w(_env)
|
_env = w(_env)
|
||||||
|
|
||||||
return _env
|
return _env
|
||||||
|
|
||||||
|
|
||||||
def make_dmp_env(env_id: str, wrappers: Iterable, **mp_kwargs):
|
def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, **mp_kwargs):
|
||||||
"""
|
"""
|
||||||
This can also be used standalone for manually building a custom DMP environment.
|
This can also be used standalone for manually building a custom DMP environment.
|
||||||
Args:
|
Args:
|
||||||
env_id: base_env_name,
|
env_id: base_env_name,
|
||||||
wrappers: list of wrappers (at least an MPEnvWrapper),
|
wrappers: list of wrappers (at least an MPEnvWrapper),
|
||||||
|
seed: seed of environment
|
||||||
mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
||||||
|
|
||||||
Returns: DMP wrapped gym env
|
Returns: DMP wrapped gym env
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers)
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed)
|
||||||
return DmpWrapper(_env, **mp_kwargs)
|
return DmpWrapper(_env, **mp_kwargs)
|
||||||
|
|
||||||
|
|
||||||
def make_detpmp_env(env_id: str, wrappers: Iterable, **mp_kwargs):
|
def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, **mp_kwargs):
|
||||||
"""
|
"""
|
||||||
This can also be used standalone for manually building a custom Det ProMP environment.
|
This can also be used standalone for manually building a custom Det ProMP environment.
|
||||||
Args:
|
Args:
|
||||||
@ -85,7 +107,7 @@ def make_detpmp_env(env_id: str, wrappers: Iterable, **mp_kwargs):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers)
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed)
|
||||||
return DetPMPWrapper(_env, **mp_kwargs)
|
return DetPMPWrapper(_env, **mp_kwargs)
|
||||||
|
|
||||||
|
|
||||||
@ -122,3 +144,9 @@ def make_detpmp_env_helper(**kwargs):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs"))
|
return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs"))
|
||||||
|
|
||||||
|
|
||||||
|
def make_contextual_env(env_id, context, seed, rank):
|
||||||
|
env = gym.make(env_id, context=context)
|
||||||
|
env.seed(seed + rank)
|
||||||
|
return lambda: env
|
||||||
|
117
example.py
117
example.py
@ -1,117 +0,0 @@
|
|||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
import gym
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
|
|
||||||
|
|
||||||
|
|
||||||
def example_mujoco():
|
|
||||||
env = gym.make('alr_envs:ALRReacher-v0')
|
|
||||||
rewards = 0
|
|
||||||
obs = env.reset()
|
|
||||||
|
|
||||||
# number of environment steps
|
|
||||||
for i in range(10000):
|
|
||||||
obs, reward, done, info = env.step(env.action_space.sample())
|
|
||||||
rewards += reward
|
|
||||||
|
|
||||||
# if i % 1 == 0:
|
|
||||||
# env.render()
|
|
||||||
|
|
||||||
if done:
|
|
||||||
print(rewards)
|
|
||||||
rewards = 0
|
|
||||||
obs = env.reset()
|
|
||||||
|
|
||||||
|
|
||||||
def example_mp(env_name="alr_envs:HoleReacherDMP-v1"):
|
|
||||||
env = gym.make(env_name)
|
|
||||||
rewards = 0
|
|
||||||
# env.render(mode=None)
|
|
||||||
obs = env.reset()
|
|
||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
|
||||||
for i in range(10):
|
|
||||||
obs, reward, done, info = env.step(env.action_space.sample())
|
|
||||||
rewards += reward
|
|
||||||
|
|
||||||
if i % 1 == 0:
|
|
||||||
# render full DMP trajectory
|
|
||||||
# render can only be called once in the beginning as well. That would render every trajectory
|
|
||||||
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
|
|
||||||
env.render(mode="human")
|
|
||||||
|
|
||||||
if done:
|
|
||||||
print(rewards)
|
|
||||||
rewards = 0
|
|
||||||
obs = env.reset()
|
|
||||||
|
|
||||||
|
|
||||||
def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)):
|
|
||||||
def make_env(env_id, seed, rank):
|
|
||||||
env = gym.make(env_id)
|
|
||||||
env.seed(seed + rank)
|
|
||||||
return lambda: env
|
|
||||||
|
|
||||||
def sample(env: gym.vector.VectorEnv, n_samples=100):
|
|
||||||
# for plotting
|
|
||||||
rewards = np.zeros(n_cpu)
|
|
||||||
|
|
||||||
# this would generate more samples than requested if n_samples % num_envs != 0
|
|
||||||
repeat = int(np.ceil(n_samples / env.num_envs))
|
|
||||||
vals = defaultdict(list)
|
|
||||||
for i in range(repeat):
|
|
||||||
obs, reward, done, info = envs.step(envs.action_space.sample())
|
|
||||||
vals['obs'].append(obs)
|
|
||||||
vals['reward'].append(reward)
|
|
||||||
vals['done'].append(done)
|
|
||||||
vals['info'].append(info)
|
|
||||||
rewards += reward
|
|
||||||
if np.any(done):
|
|
||||||
print(rewards[done])
|
|
||||||
rewards[done] = 0
|
|
||||||
|
|
||||||
# do not return values above threshold
|
|
||||||
return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)
|
|
||||||
|
|
||||||
envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed, i) for i in range(n_cpu)])
|
|
||||||
|
|
||||||
obs = envs.reset()
|
|
||||||
print(sample(envs, 16))
|
|
||||||
|
|
||||||
|
|
||||||
def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4):
|
|
||||||
n_samples = 10
|
|
||||||
|
|
||||||
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
|
|
||||||
dim = sampler.env.action_space.spaces[0].shape[0]
|
|
||||||
|
|
||||||
thetas = np.random.randn(n_samples, dim) # usually form a search distribution
|
|
||||||
|
|
||||||
_, rewards, __, ___ = sampler(thetas)
|
|
||||||
|
|
||||||
print(rewards)
|
|
||||||
|
|
||||||
|
|
||||||
def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4):
|
|
||||||
sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu)
|
|
||||||
dim = sampler.env.action_space.spaces[0].shape[0]
|
|
||||||
dist = DummyDist(dim) # needs a sample function
|
|
||||||
|
|
||||||
n_samples = 10
|
|
||||||
new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples)
|
|
||||||
|
|
||||||
print(new_rewards)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
example_mp("alr_envs:HoleReacherDetPMP-v0")
|
|
||||||
# example_mujoco()
|
|
||||||
# example_mp("alr_envs:SimpleReacherDMP-v1")
|
|
||||||
# example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
|
|
||||||
# example_async_contextual_sampler()
|
|
||||||
# env = gym.make("alr_envs:HoleReacherDetPMP-v1")
|
|
||||||
# env_name = "alr_envs:ALRBallInACupPDSimpleDetPMP-v0"
|
|
||||||
# example_async_sampler(env_name)
|
|
||||||
# example_mp(env_name)
|
|
Loading…
Reference in New Issue
Block a user