Merge pull request #12 from ALRhub/metaworld_integration

Metaworld integration
This commit is contained in:
ottofabian 2021-08-20 14:47:16 +02:00 committed by GitHub
commit 45ca0308c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 884 additions and 60 deletions

View File

@ -1,12 +1,12 @@
from gym.envs.registration import register
from gym.wrappers import FlattenObservation
from alr_envs import classic_control, dmc, open_ai
from alr_envs import classic_control, dmc, open_ai, meta
from alr_envs.utils.make_env_helpers import make_dmp_env
from alr_envs.utils.make_env_helpers import make_detpmp_env
from alr_envs.utils.make_env_helpers import make_env
from alr_envs.utils.make_env_helpers import make_env_rank
from alr_envs.utils.make_env_helpers import make
from alr_envs.utils.make_env_helpers import make_rank
# Mujoco
@ -305,18 +305,17 @@ register(
# max_episode_steps=1,
kwargs={
"name": f"ball_in_cup-catch",
"time_limit": 1,
"episode_length": 50,
"time_limit": 2,
"episode_length": 100,
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
"mp_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 1,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "motor",
"weights_scale": 50,
"goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 50,
@ -331,16 +330,15 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": f"ball_in_cup-catch",
"time_limit": 1,
"episode_length": 50,
"time_limit": 2,
"episode_length": 100,
"wrappers": [dmc.suite.ball_in_cup.MPWrapper],
"mp_kwargs": {
"num_dof": 2,
"num_basis": 5,
"duration": 1,
"duration": 2,
"width": 0.025,
"policy_type": "motor",
"weights_scale": 0.2,
"zero_start": True,
"policy_kwargs": {
"p_gains": 50,
@ -828,6 +826,7 @@ register(
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"zero_start": True,
"policy_type": "motor",
"policy_kwargs": {
"p_gains": 1.,
@ -849,6 +848,7 @@ register(
"duration": 1,
"post_traj_time": 0,
"width": 0.02,
"zero_start": True,
"policy_type": "motor",
"policy_kwargs": {
"p_gains": .6,
@ -870,6 +870,25 @@ register(
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"zero_start": True,
"policy_type": "position"
}
}
)
register(
id='FetchSlideDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchSlide-v1",
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"zero_start": True,
"policy_type": "position"
}
}
@ -887,7 +906,127 @@ register(
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"zero_start": True,
"policy_type": "position"
}
}
)
register(
id='FetchReachDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchReach-v1",
"wrappers": [FlattenObservation, open_ai.robotics.fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"zero_start": True,
"policy_type": "position"
}
}
)
# MetaWorld
goal_change_envs = ["assembly-v2", "pick-out-of-hole-v2", "plate-slide-v2", "plate-slide-back-v2",
]
for env_id in goal_change_envs:
env_id_split = env_id.split("-")
name = "".join([s.capitalize() for s in env_id_split[:-1]])
register(
id=f'{name}DetPMP-{env_id_split[-1]}',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": env_id,
"wrappers": [meta.goal_change.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"zero_start": True,
"policy_type": "metaworld",
}
}
)
object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"]
for env_id in object_change_envs:
env_id_split = env_id.split("-")
name = "".join([s.capitalize() for s in env_id_split[:-1]])
register(
id=f'{name}DetPMP-{env_id_split[-1]}',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": env_id,
"wrappers": [meta.object_change.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"zero_start": True,
"policy_type": "metaworld",
}
}
)
goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2",
"button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2",
"coffee-push-v2", "dial-turn-v2", "disassemble-v2", "door-close-v2",
"door-lock-v2", "door-open-v2", "door-unlock-v2", "hand-insert-v2",
"drawer-close-v2", "drawer-open-v2", "faucet-open-v2", "faucet-close-v2",
"handle-press-side-v2", "handle-press-v2", "handle-pull-side-v2",
"handle-pull-v2", "lever-pull-v2", "peg-insert-side-v2", "pick-place-wall-v2",
"reach-v2", "push-back-v2", "push-v2", "pick-place-v2", "peg-unplug-side-v2",
"soccer-v2", "stick-push-v2", "stick-pull-v2", "push-wall-v2", "reach-wall-v2",
"shelf-place-v2", "sweep-v2", "window-open-v2", "window-close-v2"
]
for env_id in goal_and_object_change_envs:
env_id_split = env_id.split("-")
name = "".join([s.capitalize() for s in env_id_split[:-1]])
register(
id=f'{name}DetPMP-{env_id_split[-1]}',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": env_id,
"wrappers": [meta.goal_and_object_change.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"zero_start": True,
"policy_type": "metaworld",
}
}
)
goal_and_endeffector_change_envs = ["basketball-v2"]
for env_id in goal_and_endeffector_change_envs:
env_id_split = env_id.split("-")
name = "".join([s.capitalize() for s in env_id_split[:-1]])
register(
id=f'{name}DetPMP-{env_id_split[-1]}',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": env_id,
"wrappers": [meta.goal_and_endeffector_change.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"zero_start": True,
"policy_type": "metaworld",
}
}
)

3
alr_envs/dmc/README.MD Normal file
View File

@ -0,0 +1,3 @@
# DeepMind Control (DMC) Wrappers
These are the Environment Wrappers for selected [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) environments in order to use our Motion Primitive gym interface with them.

View File

@ -17,7 +17,7 @@ def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
Returns:
"""
env = alr_envs.make_env(env_id, seed)
env = alr_envs.make(env_id, seed)
rewards = 0
obs = env.reset()
print("observation shape:", env.observation_space.shape)

View File

@ -21,7 +21,7 @@ def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
"""
env = alr_envs.make_env(env_id, seed)
env = alr_envs.make(env_id, seed)
rewards = 0
obs = env.reset()
print("Observation shape: ", env.observation_space.shape)
@ -56,7 +56,7 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
Returns: Tuple of (obs, reward, done, info) with type np.ndarray
"""
env = gym.vector.AsyncVectorEnv([alr_envs.make_env_rank(env_id, seed, i) for i in range(n_cpu)])
env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)])
# OR
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
@ -80,20 +80,21 @@ def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16
rewards[done] = 0
# do not return values above threshold
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
return (*map(lambda v: np.stack(v)[:n_samples], buffer.values()),)
if __name__ == '__main__':
render = False
render = True
# Basic gym task
example_general("Pendulum-v0", seed=10, iterations=200, render=render)
#
# # Basis task from framework
example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=render)
#
# # OpenAI Mujoco task
example_general("HalfCheetah-v2", seed=10, render=render)
#
# # Mujoco task from framework
example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render)

View File

@ -0,0 +1,128 @@
import alr_envs
from alr_envs.meta.goal_and_object_change import MPWrapper
def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
"""
Example for running a MetaWorld based env in the step based setting.
The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
return the observable goal version.
All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/
Args:
env_id: `task_name-v2`
seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
env = alr_envs.make(env_id, seed)
rewards = 0
obs = env.reset()
print("observation shape:", env.observation_space.shape)
print("action shape:", env.action_space.shape)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if render:
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
# TODO: Remove this, when Metaworld fixes its interface.
env.render(False)
if done:
print(env_id, rewards)
rewards = 0
obs = env.reset()
env.close()
del env
def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
"""
Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure.
Hence, this also allows to adjust hyperparameters of the motion primitives.
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/
Args:
seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
iterations: Number of rollout steps to run
render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)
Returns:
"""
# Base MetaWorld name, according to structure of above example
base_env = "button-press-v2"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [MPWrapper]
mp_kwargs = {
"num_dof": 4,
"num_basis": 5,
"duration": 6.25,
"post_traj_time": 0,
"width": 0.025,
"zero_start": True,
"policy_type": "metaworld",
}
env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a DMP:
# env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
# Resetting to no rendering, can be achieved by render(mode=None).
# It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed.
if render:
raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. "
"A temporary workaround is to alter their code in MujocoEnv render() from "
"`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
# TODO: Remove this, when Metaworld fixes its interface.
# env.render(mode="human")
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(base_env, rewards)
rewards = 0
obs = env.reset()
env.close()
del env
if __name__ == '__main__':
# Disclaimer: MetaWorld environments require the seed to be specified in the beginning.
# Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.
# For rendering it might be necessary to specify your OpenGL installation
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
render = False
# # Standard DMC Suite tasks
example_dmc("button-press-v2", seed=10, iterations=500, render=render)
# MP + MetaWorld hybrid task provided in the our framework
example_dmc("ButtonPressDetPMP-v2", seed=10, iterations=1, render=render)
# Custom MetaWorld task
example_custom_dmc_and_mp(seed=10, iterations=1, render=render)

View File

@ -1,5 +1,4 @@
from alr_envs import MPWrapper
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
import alr_envs
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
@ -16,7 +15,7 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, rend
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
env = make_env(env_name, seed)
env = alr_envs.make(env_name, seed)
rewards = 0
# env.render(mode=None)
@ -71,7 +70,7 @@ def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs)
# This time rendering every trajectory
if render:
@ -113,7 +112,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed.
wrappers = [MPWrapper]
wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper]
mp_kwargs = {
"num_dof": 5,
"num_basis": 5,
@ -125,7 +124,7 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)

View File

@ -1,4 +1,4 @@
from alr_envs.utils.make_env_helpers import make_env
import alr_envs
def example_mp(env_name, seed=1):
@ -13,7 +13,7 @@ def example_mp(env_name, seed=1):
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = make_env(env_name, seed)
env = alr_envs.make(env_name, seed)
rewards = 0
obs = env.reset()
@ -29,13 +29,13 @@ def example_mp(env_name, seed=1):
rewards = 0
obs = env.reset()
if __name__ == '__main__':
# DMP - not supported yet
#example_mp("ReacherDetPMP-v2")
# example_mp("ReacherDMP-v2")
# DetProMP
example_mp("ContinuousMountainCarDetPMP-v0")
example_mp("ReacherDetPMP-v2")
example_mp("FetchReachDenseDetPMP-v1")
example_mp("FetchSlideDenseDetPMP-v1")

View File

@ -1,30 +1,30 @@
import numpy as np
from matplotlib import pyplot as plt
from alr_envs import dmc
from alr_envs import dmc, meta
from alr_envs.utils.make_env_helpers import make_detpmp_env
# This might work for some environments, however, please verify either way the correct trajectory information
# for your environment are extracted below
SEED = 10
env_id = "cartpole-swingup"
wrappers = [dmc.suite.cartpole.MPWrapper]
env_id = "ball_in_cup-catch"
wrappers = [dmc.ball_in_cup.MPWrapper]
mp_kwargs = {
"num_dof": 1,
"num_basis": 5,
"num_dof": 2,
"num_basis": 10,
"duration": 2,
"width": 0.025,
"policy_type": "motor",
"weights_scale": 0.2,
"weights_scale": 1,
"zero_start": True,
"policy_kwargs": {
"p_gains": 10,
"d_gains": 10 # a good starting point is the sqrt of p_gains
"p_gains": 1,
"d_gains": 1
}
}
kwargs = dict(time_limit=2, episode_length=200)
kwargs = dict(time_limit=2, episode_length=100)
env = make_detpmp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs,
**kwargs)
@ -35,7 +35,6 @@ pos, vel = env.mp_rollout(env.action_space.sample())
base_shape = env.full_action_space.shape
actual_pos = np.zeros((len(pos), *base_shape))
actual_pos_ball = np.zeros((len(pos), *base_shape))
actual_vel = np.zeros((len(pos), *base_shape))
act = np.zeros((len(pos), *base_shape))
@ -46,7 +45,6 @@ for t, pos_vel in enumerate(zip(pos, vel)):
act[t, :] = actions
# TODO verify for your environment
actual_pos[t, :] = env.current_pos
# actual_pos_ball[t, :] = env.physics.data.qpos[2:]
actual_vel[t, :] = env.current_vel
plt.figure(figsize=(15, 5))

26
alr_envs/meta/README.MD Normal file
View File

@ -0,0 +1,26 @@
# MetaWorld Wrappers
These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them.
All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized.
Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following:
```python
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[True] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
```
For other tasks only the boolean values have to be adjusted accordingly.

View File

@ -0,0 +1 @@
from alr_envs.meta import goal_and_object_change, goal_and_endeffector_change, goal_change, object_change

View File

@ -0,0 +1,68 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
You can verify this by executing the code below for your environment id and check if the output is non-zero
at the same indices.
```python
import alr_envs
env = alr_envs.make(env_id, 1)
print(env.reset() - env.reset())
array([ !=0 , !=0 , !=0 , 0. , 0.,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , !=0 , !=0 ,
!=0 , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , !=0 , !=0 , !=0])
```
"""
@property
def active_obs(self):
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
# Current observation
[True] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[False] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
r_close = self.env.data.get_joint_qpos("r_close")
return np.hstack([self.env.data.mocap_pos.flatten(), r_close])
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
raise NotImplementedError("Velocity cannot be retrieved.")
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,68 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
You can verify this by executing the code below for your environment id and check if the output is non-zero
at the same indices.
```python
import alr_envs
env = alr_envs.make(env_id, 1)
print(env.reset() - env.reset())
array([ 0. , 0. , 0. , 0. , !=0,
!=0 , !=0 , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , !=0 , !=0 , !=0 ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , !=0 , !=0 , !=0])
```
"""
@property
def active_obs(self):
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[True] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
r_close = self.env.data.get_joint_qpos("r_close")
return np.hstack([self.env.data.mocap_pos.flatten(), r_close])
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
raise NotImplementedError("Velocity cannot be retrieved.")
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,68 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
You can verify this by executing the code below for your environment id and check if the output is non-zero
at the same indices.
```python
import alr_envs
env = alr_envs.make(env_id, 1)
print(env.reset() - env.reset())
array([ 0. , 0. , 0. , 0. , 0,
0 , 0 , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0 , 0 , 0 ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , !=0 , !=0 , !=0])
```
"""
@property
def active_obs(self):
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[False] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
r_close = self.env.data.get_joint_qpos("r_close")
return np.hstack([self.env.data.mocap_pos.flatten(), r_close])
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
raise NotImplementedError("Velocity cannot be retrieved.")
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,68 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
"""
This Wrapper is for environments where merely the goal changes in the beginning
and no secondary objects or end effectors are altered at the start of an episode.
You can verify this by executing the code below for your environment id and check if the output is non-zero
at the same indices.
```python
import alr_envs
env = alr_envs.make(env_id, 1)
print(env.reset() - env.reset())
array([ 0. , 0. , 0. , 0. , !=0 ,
!=0 , !=0 , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0 , 0 , 0 ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0. , 0. ,
0. , 0. , 0. , 0.])
```
"""
@property
def active_obs(self):
# This structure is the same for all metaworld environments.
# Only the observations which change could differ
return np.hstack([
# Current observation
[False] * 3, # end-effector position
[False] * 1, # normalized gripper open distance
[False] * 3, # main object position
[False] * 4, # main object quaternion
[False] * 3, # secondary object position
[False] * 4, # secondary object quaternion
# Previous observation
# TODO: Include previous values? According to their source they might be wrong for the first iteration.
[False] * 3, # previous end-effector position
[False] * 1, # previous normalized gripper open distance
[False] * 3, # previous main object position
[False] * 4, # previous main object quaternion
[False] * 3, # previous second object position
[False] * 4, # previous second object quaternion
# Goal
[True] * 3, # goal position
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
r_close = self.env.data.get_joint_qpos("r_close")
return np.hstack([self.env.data.mocap_pos.flatten(), r_close])
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
raise NotImplementedError("Velocity cannot be retrieved.")
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,3 @@
# OpenAI Gym Wrappers
These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments in order to use our Motion Primitive gym interface with them.

View File

@ -4,8 +4,10 @@ from typing import Union
import gym
from gym.envs.registration import register
from alr_envs.utils.make_env_helpers import make
def make(
def make_dmc(
id: str,
seed: int = 1,
visualize_reward: bool = True,

View File

@ -3,21 +3,22 @@ from typing import Iterable, List, Type, Union
import gym
import numpy as np
from gym.envs.registration import EnvSpec
from mp_env_api import MPEnvWrapper
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
"""
TODO: Do we need this?
Generate a callable to create a new gym environment with a given seed.
The rank is added to the seed and can be used for example when using vector environments.
E.g. [make_env_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments
with seeds 123 through 130.
Hence, testing environments should be seeded with a value which is offset by the number of training environments.
Here e.g. [make_env_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns
Args:
env_id: name of the environment
@ -30,12 +31,12 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, *
"""
def f():
return make_env(env_id, seed + rank, **kwargs)
return make(env_id, seed + rank, **kwargs)
return f if return_callable else f()
def make_env(env_id: str, seed, **kwargs):
def make(env_id: str, seed, **kwargs):
"""
Converts an env_id to an environment with the gym API.
This also works for DeepMind Control Suite interface_wrappers
@ -58,13 +59,30 @@ def make_env(env_id: str, seed, **kwargs):
env.action_space.seed(seed)
env.observation_space.seed(seed)
except gym.error.Error:
# DMC
from alr_envs.utils import make
env = make(env_id, seed=seed, **kwargs)
assert env.base_step_limit == env.spec.max_episode_steps, \
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
f"the DMC environment specification of {env.base_step_limit} steps."
# MetaWorld env
import metaworld
if env_id in metaworld.ML1.ENV_NAMES:
env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs)
# setting this avoids generating the same initialization after each reset
env._freeze_rand_vec = False
# Manually set spec, as metaworld environments are not registered via gym
env.unwrapped.spec = EnvSpec(env_id)
# Set Timelimit based on the maximum allowed path length of the environment
env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length)
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
env.goal_space.seed(seed)
else:
# DMC
from alr_envs.utils import make_dmc
env = make_dmc(env_id, seed=seed, **kwargs)
assert env.base_step_limit == env.spec.max_episode_steps, \
f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \
f"the DMC environment specification of {env.base_step_limit} steps."
return env
@ -84,7 +102,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
"""
# _env = gym.make(env_id)
_env = make_env(env_id, seed, **kwargs)
_env = make(env_id, seed, **kwargs)
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
@ -175,7 +193,7 @@ def make_detpmp_env_helper(**kwargs):
def make_contextual_env(env_id, context, seed, rank):
env = make_env(env_id, seed + rank, context=context)
env = make(env_id, seed + rank, context=context)
# env = gym.make(env_id, context=context)
# env.seed(seed + rank)
return lambda: env

View File

@ -3,7 +3,7 @@ from gym.vector.async_vector_env import AsyncVectorEnv
import numpy as np
from _collections import defaultdict
from alr_envs.utils.make_env_helpers import make_env_rank
from alr_envs.utils.make_env_helpers import make_rank
def split_array(ary, size):
@ -54,7 +54,7 @@ class AlrMpEnvSampler:
def __init__(self, env_id, num_envs, seed=0, **env_kwargs):
self.num_envs = num_envs
self.env = AsyncVectorEnv([make_env_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
self.env = AsyncVectorEnv([make_rank(env_id, seed, i, **env_kwargs) for i in range(num_envs)])
def __call__(self, params):
params = np.atleast_2d(params)

View File

@ -12,6 +12,7 @@ setup(
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
'mujoco-py<2.1,>=2.0',
'dm_control'
'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld'
],
url='https://github.com/ALRhub/alr_envs/',

127
test/test_dmc_envs.py Normal file
View File

@ -0,0 +1,127 @@
import unittest
import gym
import numpy as np
from dm_control import suite, manipulation
from alr_envs import make
DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"]
MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')]
SEED = 1
class TestEnvironments(unittest.TestCase):
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
iterations: Number of rollout steps to run
seed= random seeding
render: Render the episode
Returns:
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
length = env.spec.max_episode_steps
if iterations is None:
if length is None:
iterations = 1
else:
iterations = length
# number of samples(multiple environment steps)
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
self._verify_done(done)
rewards.append(reward)
dones.append(done)
if render:
env.render("human")
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
f"Observation {obs} received from {obs_type} "
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
def test_dmc_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in DMC_ENVS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_dmc_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in DMC_ENVS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
def test_manipulation_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in MANIPULATION_SPECS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_manipulation_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in MANIPULATION_SPECS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, obs2, rwd2, done2 = time_step
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.")
self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':
unittest.main()

View File

@ -4,7 +4,7 @@ import gym
import numpy as np
import alr_envs # noqa
from alr_envs.utils.make_env_helpers import make_env
from alr_envs.utils.make_env_helpers import make
ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point)
SEED = 1
@ -27,7 +27,7 @@ class TestEnvironments(unittest.TestCase):
Returns:
"""
env: gym.Env = make_env(env_id, seed=seed)
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
dones = []
@ -62,6 +62,7 @@ class TestEnvironments(unittest.TestCase):
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
@ -81,7 +82,6 @@ class TestEnvironments(unittest.TestCase):
def test_environment_functionality(self):
"""Tests that environments runs without errors using random actions."""
for spec in ALL_SPECS:
# try:
with self.subTest(msg=spec.id):
self._run_env(spec.id)
@ -91,7 +91,6 @@ class TestEnvironments(unittest.TestCase):
# Iterate over two trajectories, which should have the same state and action sequence
for spec in ALL_SPECS:
with self.subTest(msg=spec.id):
self._run_env(spec.id)
traj1 = self._run_env(spec.id, seed=seed)
traj2 = self._run_env(spec.id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):

107
test/test_metaworld_envs.py Normal file
View File

@ -0,0 +1,107 @@
import unittest
import gym
import numpy as np
from alr_envs import make
from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()]
SEED = 1
class TestEnvironments(unittest.TestCase):
def _run_env(self, env_id, iterations=None, seed=SEED, render=False):
"""
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
iterations: Number of rollout steps to run
seed= random seeding
render: Render the episode
Returns:
"""
env: gym.Env = make(env_id, seed=seed)
rewards = []
observations = []
actions = []
dones = []
obs = env.reset()
self._verify_observations(obs, env.observation_space, "reset()")
length = env.max_path_length
if iterations is None:
if length is None:
iterations = 1
else:
iterations = length
# number of samples(multiple environment steps)
for i in range(iterations):
observations.append(obs)
ac = env.action_space.sample()
actions.append(ac)
# ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape)
obs, reward, done, info = env.step(ac)
self._verify_observations(obs, env.observation_space, "step()")
self._verify_reward(reward)
self._verify_done(done)
rewards.append(reward)
dones.append(done)
if render:
env.render("human")
if done:
obs = env.reset()
assert done, "Done flag is not True after max episode length."
observations.append(obs)
env.close()
del env
return np.array(observations), np.array(rewards), np.array(dones), np.array(actions)
def _verify_observations(self, obs, observation_space, obs_type="reset()"):
self.assertTrue(observation_space.contains(obs),
f"Observation {obs} received from {obs_type} "
f"not contained in observation space {observation_space}.")
def _verify_reward(self, reward):
self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.")
def _verify_done(self, done):
self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.")
def test_dmc_functionality(self):
"""Tests that environments runs without errors using random actions."""
for env_id in ALL_ENVS:
with self.subTest(msg=env_id):
self._run_env(env_id)
def test_dmc_determinism(self):
"""Tests that identical seeds produce identical trajectories."""
seed = 0
# Iterate over two trajectories, which should have the same state and action sequence
for env_id in ALL_ENVS:
with self.subTest(msg=env_id):
traj1 = self._run_env(env_id, seed=seed)
traj2 = self._run_env(env_id, seed=seed)
for i, time_step in enumerate(zip(*traj1, *traj2)):
obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step
self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.")
self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.")
self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.")
self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.")
if __name__ == '__main__':
unittest.main()