unified API wrapper and updated examples

This commit is contained in:
ottofabian 2021-07-02 13:09:56 +02:00
parent 6607d9cff9
commit 80933eba09
22 changed files with 383 additions and 485 deletions

View File

@ -4,9 +4,8 @@ from gym.envs.registration import register
from alr_envs.classic_control.hole_reacher.hole_reacher_mp_wrapper import HoleReacherMPWrapper from alr_envs.classic_control.hole_reacher.hole_reacher_mp_wrapper import HoleReacherMPWrapper
from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import SimpleReacherMPWrapper from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import SimpleReacherMPWrapper
from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
# Mujoco # Mujoco
@ -204,7 +203,7 @@ register(
"hole_width": None, "hole_width": None,
"hole_depth": 1, "hole_depth": 1,
"hole_x": None, "hole_x": None,
"collision_penalty": 1000, "collision_penalty": 100,
} }
) )
@ -236,7 +235,7 @@ register(
"hole_width": 0.25, "hole_width": 0.25,
"hole_depth": 1, "hole_depth": 1,
"hole_x": 2, "hole_x": 2,
"collision_penalty": 1000, "collision_penalty": 100,
} }
) )
@ -354,7 +353,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
kwargs={ kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0", "name": "alr_envs:ALRBallInACupSimple-v0",
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], "wrappers": [BallInACupMPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 3, "num_dof": 3,
"num_basis": 5, "num_basis": 5,
@ -379,7 +378,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
kwargs={ kwargs={
"name": "alr_envs:ALRBallInACup-v0", "name": "alr_envs:ALRBallInACup-v0",
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], "wrappers": [BallInACupMPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 7, "num_dof": 7,
"num_basis": 5, "num_basis": 5,
@ -404,7 +403,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={ kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0", "name": "alr_envs:ALRBallInACupSimple-v0",
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], "wrappers": [BallInACupMPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 3, "num_dof": 3,
"num_basis": 5, "num_basis": 5,
@ -429,7 +428,7 @@ register(
entry_point='alr_envs.mujoco.ball_in_a_cup.biac_pd:make_detpmp_env_helper', entry_point='alr_envs.mujoco.ball_in_a_cup.biac_pd:make_detpmp_env_helper',
kwargs={ kwargs={
"name": "alr_envs:ALRBallInACupPDSimple-v0", "name": "alr_envs:ALRBallInACupPDSimple-v0",
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], "wrappers": [BallInACupMPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 3, "num_dof": 3,
"num_basis": 5, "num_basis": 5,
@ -474,7 +473,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={ kwargs={
"name": "alr_envs:ALRBallInACupSimple-v0", "name": "alr_envs:ALRBallInACupSimple-v0",
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], "wrappers": [BallInACupMPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 7, "num_dof": 7,
"num_basis": 5, "num_basis": 5,
@ -486,7 +485,6 @@ register(
"zero_start": True, "zero_start": True,
"zero_goal": True, "zero_goal": True,
"policy_kwargs": { "policy_kwargs": {
"p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]), "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]),
"d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025]) "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025])
} }
@ -499,7 +497,7 @@ register(
entry_point='alr_envs.utils.make_env_helpers:make_contextual_env', entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',
kwargs={ kwargs={
"name": "alr_envs:ALRBallInACupGoal-v0", "name": "alr_envs:ALRBallInACupGoal-v0",
"wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], "wrappers": [BallInACupMPWrapper],
"mp_kwargs": { "mp_kwargs": {
"num_dof": 7, "num_dof": 7,
"num_basis": 5, "num_basis": 5,
@ -522,7 +520,7 @@ register(
## DMC ## DMC
register( register(
id=f'dmc_ball_in_cup_dmp-v0', id=f'dmc_ball_in_cup-catch_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs={ kwargs={
@ -531,19 +529,23 @@ register(
"mp_kwargs": { "mp_kwargs": {
"num_dof": 2, "num_dof": 2,
"num_basis": 5, "num_basis": 5,
"duration": 2, "duration": 20,
"learn_goal": True, "learn_goal": True,
"alpha_phase": 2, "alpha_phase": 2,
"bandwidth_factor": 2, "bandwidth_factor": 2,
"policy_type": "velocity", "policy_type": "motor",
"weights_scale": 50, "weights_scale": 50,
"goal_scale": 0.1 "goal_scale": 0.1,
"policy_kwargs": {
"p_gains": 0.2,
"d_gains": 0.05
}
} }
} }
) )
register( register(
id=f'dmc_ball_in_cup_detpmp-v0', id=f'dmc_ball_in_cup-catch_detpmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={ kwargs={
"name": f"ball_in_cup-catch", "name": f"ball_in_cup-catch",
@ -551,16 +553,19 @@ register(
"mp_kwargs": { "mp_kwargs": {
"num_dof": 2, "num_dof": 2,
"num_basis": 5, "num_basis": 5,
"duration": 2, "duration": 20,
"width": 0.025, "width": 0.025,
"policy_type": "velocity", "policy_type": "velocity",
"weights_scale": 0.2, "weights_scale": 0.2,
"zero_start": True "zero_start": True,
"policy_kwargs": {
"p_gains": 0.2,
"d_gains": 0.05
}
} }
} }
) )
# BBO functions # BBO functions
for dim in [5, 10, 25, 50, 100]: for dim in [5, 10, 25, 50, 100]:

View File

@ -69,9 +69,17 @@ class HoleReacherEnv(gym.Env):
def dt(self) -> Union[float, int]: def dt(self) -> Union[float, int]:
return self._dt return self._dt
# @property
# def start_pos(self):
# return self._start_pos
@property @property
def start_pos(self): def current_pos(self):
return self._start_pos return self._joint_angles.copy()
@property
def current_vel(self):
return self._angle_velocity.copy()
def step(self, action: np.ndarray): def step(self, action: np.ndarray):
""" """

View File

@ -1,8 +1,8 @@
from typing import Union from typing import Tuple, Union
import numpy as np import numpy as np
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class HoleReacherMPWrapper(MPEnvWrapper): class HoleReacherMPWrapper(MPEnvWrapper):
@ -19,11 +19,15 @@ class HoleReacherMPWrapper(MPEnvWrapper):
]) ])
@property @property
def start_pos(self) -> Union[float, int, np.ndarray]: def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.start_pos return self.env.current_pos
@property @property
def goal_pos(self) -> Union[float, int, np.ndarray]: def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.") raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property @property

View File

@ -59,9 +59,17 @@ class SimpleReacherEnv(gym.Env):
def dt(self) -> Union[float, int]: def dt(self) -> Union[float, int]:
return self._dt return self._dt
# @property
# def start_pos(self):
# return self._start_pos
@property @property
def start_pos(self): def current_pos(self):
return self._start_pos return self._joint_angles
@property
def current_vel(self):
return self._angle_velocity
def step(self, action: np.ndarray): def step(self, action: np.ndarray):
""" """

View File

@ -1,8 +1,8 @@
from typing import Union from typing import Tuple, Union
import numpy as np import numpy as np
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class SimpleReacherMPWrapper(MPEnvWrapper): class SimpleReacherMPWrapper(MPEnvWrapper):
@ -17,11 +17,15 @@ class SimpleReacherMPWrapper(MPEnvWrapper):
]) ])
@property @property
def start_pos(self): def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.start_pos return self.env.current_pos
@property @property
def goal_pos(self): def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.") raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property @property

View File

@ -63,9 +63,17 @@ class ViaPointReacher(gym.Env):
def dt(self): def dt(self):
return self._dt return self._dt
# @property
# def start_pos(self):
# return self._start_pos
@property @property
def start_pos(self): def current_pos(self):
return self._start_pos return self._joint_angles.copy()
@property
def current_vel(self):
return self._angle_velocity.copy()
def step(self, action: np.ndarray): def step(self, action: np.ndarray):
""" """

View File

@ -1,8 +1,8 @@
from typing import Union from typing import Tuple, Union
import numpy as np import numpy as np
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class ViaPointReacherMPWrapper(MPEnvWrapper): class ViaPointReacherMPWrapper(MPEnvWrapper):
@ -18,11 +18,15 @@ class ViaPointReacherMPWrapper(MPEnvWrapper):
]) ])
@property @property
def start_pos(self) -> Union[float, int, np.ndarray]: def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.start_pos return self.env.current_pos
@property @property
def goal_pos(self) -> Union[float, int, np.ndarray]: def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.current_vel
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.") raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property @property

View File

@ -1,27 +0,0 @@
from typing import Union
import numpy as np
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class DMCBallInCupMPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[False] * 2, # cup position
[True] * 2, # ball position
[False] * 2, # cup velocity
[False] * 2, # ball velocity
])
@property
def start_pos(self) -> Union[float, int, np.ndarray]:
return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']])
@property
def dt(self) -> Union[float, int]:
# Taken from: https://github.com/deepmind/dm_control/blob/master/dm_control/suite/ball_in_cup.py#L27
return 0.02

View File

View File

@ -0,0 +1,34 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class DMCBallInCupMPWrapper(MPEnvWrapper):
@property
def active_obs(self):
# Besides the ball position, the environment is always set to 0.
return np.hstack([
[False] * 2, # cup position
[True] * 2, # ball position
[False] * 2, # cup velocity
[False] * 2, # ball velocity
])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']])
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return np.hstack([self.physics.named.data.qvel['cup_x'], self.physics.named.data.qvel['cup_z']])
@property
def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]:
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -1,9 +1,23 @@
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
def example_dmc(env_name="fish-swim", seed=1, iterations=1000): def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
env = make_env(env_name, seed) """
Example for running a DMC based env in the step based setting.
The env_id has to be specified as `domain_name-task_name` or
for manipulation tasks as `manipulation-environment_name`
Args:
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
env = make_env(env_id, seed)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
print("observation shape:", env.observation_space.shape) print("observation shape:", env.observation_space.shape)
@ -15,39 +29,44 @@ def example_dmc(env_name="fish-swim", seed=1, iterations=1000):
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward rewards += reward
env.render("human") if render:
env.render("human")
if done: if done:
print(env_name, rewards) print(env_id, rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
env.close() env.close()
def example_custom_dmc_and_mp(seed=1): def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
""" """
Example for running a custom motion primitive based environments based off of a dmc task. Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure, but do not directly allow for modifications. Our already registered environments follow the same structure.
Hence, this also allows to adjust hyperparameters of the motion primitives more easily. Hence, this also allows to adjust hyperparameters of the motion primitives.
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks) We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/ for our repo: https://github.com/ALRhub/alr_envs/
Args: Args:
seed: seed seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns: Returns:
""" """
# Base DMC name, according to structure of above example
base_env = "ball_in_cup-catch" base_env = "ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed. # You can also add other gym.Wrappers in case they are needed.
# wrappers = [HoleReacherMPWrapper]
wrappers = [DMCBallInCupMPWrapper] wrappers = [DMCBallInCupMPWrapper]
mp_kwargs = { mp_kwargs = {
"num_dof": 2, # env.start_pos "num_dof": 2, # env.start_pos
"num_basis": 5, "num_basis": 5,
"duration": 2, "duration": 20,
"learn_goal": True, "learn_goal": True,
"alpha_phase": 2, "alpha_phase": 2,
"bandwidth_factor": 2, "bandwidth_factor": 2,
@ -57,14 +76,21 @@ def example_custom_dmc_and_mp(seed=1):
} }
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP: # OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, **mp_args) # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args)
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
# Resetting to no rendering, can be achieved by render(mode=None).
# It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed.
if render:
env.render(mode="human")
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
env.render("human")
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(10): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward rewards += reward
@ -85,14 +111,14 @@ if __name__ == '__main__':
# export MUJOCO_GL="osmesa" # export MUJOCO_GL="osmesa"
# Standard DMC Suite tasks # Standard DMC Suite tasks
example_dmc("fish-swim", seed=10, iterations=100) example_dmc("fish-swim", seed=10, iterations=1000, render=True)
# Manipulation tasks # Manipulation tasks
# The vision versions are currently not integrated # Disclaimer: The vision versions are currently not integrated and yield an error
example_dmc("manipulation-reach_site_features", seed=10, iterations=100) example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=True)
# Gym + DMC hybrid task provided in the MP framework # Gym + DMC hybrid task provided in the MP framework
example_dmc("dmc_ball_in_cup_dmp-v0", seed=10, iterations=10) example_dmc("dmc_ball_in_cup-catch_detpmp-v0", seed=10, iterations=1, render=True)
# Custom DMC task # Custom DMC task
example_custom_dmc_and_mp() example_custom_dmc_and_mp(seed=10, iterations=1, render=True)

View File

@ -4,14 +4,23 @@ from collections import defaultdict
import gym import gym
import numpy as np import numpy as np
from alr_envs.utils.make_env_helpers import make_env from alr_envs.utils.make_env_helpers import make_env, make_env_rank
from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist
def example_general(env_id: str, seed=1, iterations=1000): def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True):
""" """
Example for running any env in the step based setting. Example for running any env in the step based setting.
This also includes DMC environments when leveraging our custom make_env function. This also includes DMC environments when leveraging our custom make_env function.
Args:
env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns:
""" """
env = make_env(env_id, seed) env = make_env(env_id, seed)
@ -25,7 +34,7 @@ def example_general(env_id: str, seed=1, iterations=1000):
obs, reward, done, info = env.step(env.action_space.sample()) obs, reward, done, info = env.step(env.action_space.sample())
rewards += reward rewards += reward
if i % 1 == 0: if render:
env.render() env.render()
if done: if done:
@ -34,36 +43,60 @@ def example_general(env_id: str, seed=1, iterations=1000):
obs = env.reset() obs = env.reset()
def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)): def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800):
def sample(env: gym.vector.VectorEnv, n_samples=100): """
# for plotting Example for running any env in a vectorized multiprocessing setting to generate more samples faster.
rewards = np.zeros(n_cpu) This also includes DMC and DMP environments when leveraging our custom make_env function.
Be aware, increasing the number of environments reduces the total length of the individual episodes.
# this would generate more samples than requested if n_samples % num_envs != 0 Args:
repeat = int(np.ceil(n_samples / env.num_envs)) env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks
vals = defaultdict(list) seed: seed for deterministic behaviour
for i in range(repeat): n_cpu: Number of cpus cores to use in parallel
obs, reward, done, info = envs.step(envs.action_space.sample()) n_samples: number of samples generated in total by all environments.
vals['obs'].append(obs)
vals['reward'].append(reward)
vals['done'].append(done)
vals['info'].append(info)
rewards += reward
if np.any(done):
print(rewards[done])
rewards[done] = 0
# do not return values above threshold Returns: Tuple of (obs, reward, done, info) with type np.ndarray
return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),)
from alr_envs.utils.make_env_helpers import make_env_rank """
envs = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)]) env = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)])
# OR
# envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)]) # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)])
obs = envs.reset() # for plotting
print(sample(envs, 16)) rewards = np.zeros(n_cpu)
buffer = defaultdict(list)
obs = env.reset()
# this would generate more samples than requested if n_samples % num_envs != 0
repeat = int(np.ceil(n_samples / env.num_envs))
for i in range(repeat):
obs, reward, done, info = env.step(env.action_space.sample())
buffer['obs'].append(obs)
buffer['reward'].append(reward)
buffer['done'].append(done)
buffer['info'].append(info)
rewards += reward
if np.any(done):
print(f"Reward at iteration {i}: {rewards[done]}")
rewards[done] = 0
# do not return values above threshold
return *map(lambda v: np.stack(v)[:n_samples], buffer.values()),
if __name__ == '__main__': if __name__ == '__main__':
# Mujoco task from framework # Basic gym task
example_general("alr_envs:ALRReacher-v0") # example_general("Pendulum-v0", seed=10, iterations=200, render=True)
#
# # Basis task from framework
# example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True)
#
# # OpenAI Mujoco task
# example_general("HalfCheetah-v2", seed=10, render=True)
#
# # Mujoco task from framework
# example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=True)
# Vectorized multiprocessing environments
example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200)

View File

@ -2,12 +2,14 @@ from alr_envs import HoleReacherMPWrapper
from alr_envs.utils.make_env_helpers import make_dmp_env, make_env from alr_envs.utils.make_env_helpers import make_dmp_env, make_env
def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1): def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
""" """
Example for running a motion primitive based environment, which is already registered Example for running a motion primitive based environment, which is already registered
Args: Args:
env_name: DMP env_id env_name: DMP env_id
seed: seed seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns: Returns:
@ -16,44 +18,81 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1):
# First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface.
env = make_env(env_name, seed) env = make_env(env_name, seed)
# Changing the mp_kwargs is possible by providing them to gym.
# E.g. here by providing way to many basis functions
# mp_kwargs = {
# "num_dof": 5,
# "num_basis": 1000,
# "duration": 2,
# "learn_goal": True,
# "alpha_phase": 2,
# "bandwidth_factor": 2,
# "policy_type": "velocity",
# "weights_scale": 50,
# "goal_scale": 0.1
# }
# env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
rewards = 0 rewards = 0
# env.render(mode=None) # env.render(mode=None)
obs = env.reset() obs = env.reset()
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(10): for i in range(iterations):
if render and i % 2 == 0:
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
# Resetting to no rendering, can be achieved by render(mode=None).
# It is also possible to change the mode multiple times when
# e.g. only every second trajectory should be displayed, such as here
# Just make sure the correct mode is set before executing the step.
env.render(mode="human")
else:
env.render(mode=None)
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward rewards += reward
if i % 1 == 0:
# render full DMP trajectory
# render can only be called once in the beginning as well. That would render every trajectory
# Calling it after every trajectory allows to modify the mode. mode=None, disables rendering.
env.render(mode="human")
if done: if done:
print(rewards) print(rewards)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
def example_custom_mp(seed=1): def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True):
"""
Example for running a motion primitive based environment, which is already registered
Args:
env_name: DMP env_id
seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
# Changing the mp_kwargs is possible by providing them to gym.
# E.g. here by providing way to many basis functions
mp_kwargs = {
"num_dof": 5,
"num_basis": 1000,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1
}
env = make_env(env_name, seed, mp_kwargs=mp_kwargs)
# This time rendering every trajectory
if render:
env.render(mode="human")
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
def example_fully_custom_mp(seed=1, iterations=1, render=True):
""" """
Example for running a custom motion primitive based environments. Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure. Our already registered environments follow the same structure.
@ -63,12 +102,15 @@ def example_custom_mp(seed=1):
for our repo: https://github.com/ALRhub/alr_envs/ for our repo: https://github.com/ALRhub/alr_envs/
Args: Args:
seed: seed seed: seed
iterations: Number of rollout steps to run
render: Render the episode
Returns: Returns:
""" """
base_env = "alr_envs:HoleReacher-v1" base_env = "alr_envs:HoleReacher-v1"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed. # You can also add other gym.Wrappers in case they are needed.
wrappers = [HoleReacherMPWrapper] wrappers = [HoleReacherMPWrapper]
@ -85,19 +127,16 @@ def example_custom_mp(seed=1):
} }
env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP: # OR for a deterministic ProMP:
# env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed) # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
if render:
env.render(mode="human")
rewards = 0 rewards = 0
# render full DMP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
# Resetting to no rendering, can be achieved by render(mode=None).
# It is also possible to change them mode multiple times when
# e.g. only every nth trajectory should be displayed.
env.render(mode="human")
obs = env.reset() obs = env.reset()
# number of samples/full trajectories (multiple environment steps) # number of samples/full trajectories (multiple environment steps)
for i in range(10): for i in range(iterations):
ac = env.action_space.sample() ac = env.action_space.sample()
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
rewards += reward rewards += reward
@ -110,10 +149,13 @@ def example_custom_mp(seed=1):
if __name__ == '__main__': if __name__ == '__main__':
# DMP # DMP
example_mp("alr_envs:HoleReacherDMP-v1") example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=True)
# DetProMP # DetProMP
example_mp("alr_envs:HoleReacherDetPMP-v1") example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=True)
# Custom DMP # Altered basis functions
example_custom_mp() example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=True)
# Custom MP
example_fully_custom_mp(seed=10, iterations=1, render=True)

View File

@ -1,270 +0,0 @@
from collections import OrderedDict
import os
from abc import abstractmethod
from gym import error, spaces
from gym.utils import seeding
import numpy as np
from os import path
from alr_envs.utils.mps.alr_env import AlrEnv
from alr_envs.utils.positional_env import PositionalEnv
try:
import mujoco_py
except ImportError as e:
raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e))
DEFAULT_SIZE = 500
def convert_observation_to_space(observation):
if isinstance(observation, dict):
space = spaces.Dict(OrderedDict([
(key, convert_observation_to_space(value))
for key, value in observation.items()
]))
elif isinstance(observation, np.ndarray):
low = np.full(observation.shape, -float('inf'), dtype=np.float32)
high = np.full(observation.shape, float('inf'), dtype=np.float32)
space = spaces.Box(low, high, dtype=observation.dtype)
else:
raise NotImplementedError(type(observation), observation)
return space
class AlrMujocoEnv(PositionalEnv, AlrEnv):
"""
Superclass for all MuJoCo environments.
"""
def __init__(self, model_path, n_substeps, apply_gravity_comp=True):
"""
Args:
model_path: path to xml file
n_substeps: how many steps mujoco does per call to env.step
apply_gravity_comp: Whether gravity compensation should be active
"""
if model_path.startswith("/"):
fullpath = model_path
else:
fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path)
if not path.exists(fullpath):
raise IOError("File %s does not exist" % fullpath)
self.n_substeps = n_substeps
self.apply_gravity_comp = apply_gravity_comp
self.model = mujoco_py.load_model_from_path(fullpath)
self.sim = mujoco_py.MjSim(self.model, nsubsteps=n_substeps)
self.data = self.sim.data
self.viewer = None
self._viewers = {}
self.metadata = {
'render.modes': ['human', 'rgb_array', 'depth_array'],
'video.frames_per_second': int(np.round(1.0 / self.dt))
}
self.init_qpos = self.sim.data.qpos.ravel().copy()
self.init_qvel = self.sim.data.qvel.ravel().copy()
self._start_pos = None
self._start_vel = None
self._set_action_space()
observation = self._get_obs() # TODO: is calling get_obs enough? should we call reset, or even step?
self._set_observation_space(observation)
self.seed()
@property
def current_pos(self):
"""
By default returns the joint positions of all simulated objects. May be overridden in subclass.
"""
return self.sim.data.qpos
@property
def current_vel(self):
"""
By default returns the joint velocities of all simulated objects. May be overridden in subclass.
"""
return self.sim.data.qvel
@property
def start_pos(self):
"""
Start position of the agent, for example joint angles of a Panda robot. Necessary for MP wrapped simple_reacher.
"""
return self._start_pos
@property
def start_vel(self):
"""
Start velocity of the agent. Necessary for MP wrapped simple_reacher.
"""
return self._start_vel
def extend_des_pos(self, des_pos):
"""
In a simplified environment, the actions may only control a subset of all the joints.
Extend the trajectory to match the environments full action space
Args:
des_pos:
Returns:
"""
pass
def extend_des_vel(self, des_vel):
pass
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
def _set_observation_space(self, observation):
self.observation_space = convert_observation_to_space(observation)
return self.observation_space
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
# methods to override:
# ----------------------------
@property
@abstractmethod
def active_obs(self):
"""Returns boolean mask for each observation entry
whether the observation is returned for the contextual case or not.
This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
"""
return np.ones(self.observation_space.shape, dtype=bool)
def _get_obs(self):
"""Returns the observation.
"""
raise NotImplementedError()
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).
Implement this in each subclass.
"""
raise NotImplementedError
def viewer_setup(self):
"""
This method is called when the viewer is initialized.
Optionally implement this method, if you need to tinker with camera position
and so forth.
"""
pass
# -----------------------------
def reset(self):
self.sim.reset()
ob = self.reset_model()
return ob
def set_state(self, qpos, qvel):
assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
old_state = self.sim.get_state()
new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel,
old_state.act, old_state.udd_state)
self.sim.set_state(new_state)
self.sim.forward()
@property
def dt(self):
return self.model.opt.timestep * self.n_substeps
def do_simulation(self, ctrl):
"""
Additionally returns whether there was an error while stepping the simulation
"""
error_in_sim = False
num_actuations = len(ctrl)
if self.apply_gravity_comp:
self.sim.data.ctrl[:num_actuations] = ctrl + self.sim.data.qfrc_bias[:num_actuations].copy() / self.model.actuator_gear[:, 0]
else:
self.sim.data.ctrl[:num_actuations] = ctrl
try:
self.sim.step()
except mujoco_py.builder.MujocoException:
error_in_sim = True
return error_in_sim
def render(self,
mode='human',
width=DEFAULT_SIZE,
height=DEFAULT_SIZE,
camera_id=None,
camera_name=None):
if mode == 'rgb_array' or mode == 'depth_array':
if camera_id is not None and camera_name is not None:
raise ValueError("Both `camera_id` and `camera_name` cannot be"
" specified at the same time.")
no_camera_specified = camera_name is None and camera_id is None
if no_camera_specified:
camera_name = 'track'
if camera_id is None and camera_name in self.model._camera_name2id:
camera_id = self.model.camera_name2id(camera_name)
self._get_viewer(mode).render(width, height, camera_id=camera_id)
if mode == 'rgb_array':
# window size used for old mujoco-py:
data = self._get_viewer(mode).read_pixels(width, height, depth=False)
# original image is upside-down, so flip it
return data[::-1, :, :]
elif mode == 'depth_array':
self._get_viewer(mode).render(width, height)
# window size used for old mujoco-py:
# Extract depth part of the read_pixels() tuple
data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1]
# original image is upside-down, so flip it
return data[::-1, :]
elif mode == 'human':
self._get_viewer(mode).render()
def close(self):
if self.viewer is not None:
# self.viewer.finish()
self.viewer = None
self._viewers = {}
def _get_viewer(self, mode):
self.viewer = self._viewers.get(mode)
if self.viewer is None:
if mode == 'human':
self.viewer = mujoco_py.MjViewer(self.sim)
elif mode == 'rgb_array' or mode == 'depth_array':
self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1)
self.viewer_setup()
self._viewers[mode] = self.viewer
return self.viewer
def get_body_com(self, body_name):
return self.data.get_body_xpos(body_name)
def state_vector(self):
return np.concatenate([
self.sim.data.qpos.flat,
self.sim.data.qvel.flat
])

View File

@ -1,10 +1,11 @@
from gym import utils from gym import utils
import os import os
import numpy as np import numpy as np
from alr_envs.mujoco import alr_mujoco_env from gym.envs.mujoco import MujocoEnv
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False, def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
reward_type: str = None, context: np.ndarray = None): reward_type: str = None, context: np.ndarray = None):
utils.EzPickle.__init__(**locals()) utils.EzPickle.__init__(**locals())

View File

@ -1,8 +1,8 @@
from typing import Union from typing import Tuple, Union
import numpy as np import numpy as np
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class BallInACupMPWrapper(MPEnvWrapper): class BallInACupMPWrapper(MPEnvWrapper):
@ -24,6 +24,14 @@ class BallInACupMPWrapper(MPEnvWrapper):
else: else:
return self._start_pos return self._start_pos
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qpos[0:7].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qvel[0:7].copy()
@property @property
def goal_pos(self): def goal_pos(self):
# TODO: @Max I think the default value of returning to the start is reasonable here # TODO: @Max I think the default value of returning to the start is reasonable here

View File

@ -1,15 +0,0 @@
from typing import Tuple, Union
import numpy as np
from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
class BallInACupPositionalWrapper(PositionalEnvWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qpos[0:7].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.sim.data.qvel[0:7].copy()

View File

@ -1,11 +1,15 @@
from gym import utils
import os import os
import numpy as np import numpy as np
from alr_envs.mujoco import alr_mujoco_env from gym import utils
from gym.envs.mujoco import MujocoEnv
class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None):
utils.EzPickle.__init__(**locals())
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
self._steps = 0 self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@ -25,12 +29,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
self.context = None self.context = None
# alr_mujoco_env.AlrMujocoEnv.__init__(self,
utils.EzPickle.__init__(self) # self.xml_path,
alr_mujoco_env.AlrMujocoEnv.__init__(self, # apply_gravity_comp=apply_gravity_comp,
self.xml_path, # n_substeps=n_substeps)
apply_gravity_comp=apply_gravity_comp,
n_substeps=n_substeps)
self.sim_time = 8 # seconds self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / self.dt) self.sim_steps = int(self.sim_time / self.dt)

View File

@ -1,11 +1,13 @@
from gym import utils from gym import utils
import os import os
import numpy as np import numpy as np
from alr_envs.mujoco import alr_mujoco_env from gym.envs.mujoco import MujocoEnv
class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): class ALRBeerpongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
utils.EzPickle.__init__(**locals())
self._steps = 0 self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@ -26,11 +28,12 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
self.context = None self.context = None
utils.EzPickle.__init__(self) MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps)
alr_mujoco_env.AlrMujocoEnv.__init__(self,
self.xml_path, # alr_mujoco_env.AlrMujocoEnv.__init__(self,
apply_gravity_comp=apply_gravity_comp, # self.xml_path,
n_substeps=n_substeps) # apply_gravity_comp=apply_gravity_comp,
# n_substeps=n_substeps)
self.sim_time = 8 # seconds self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / self.dt) self.sim_steps = int(self.sim_time / self.dt)

View File

@ -1,22 +1,24 @@
import collections
import re import re
from typing import Union
import gym import gym
from gym.envs.registration import register from gym.envs.registration import register
def make( def make(
id, id: str,
seed=1, seed: int = 1,
visualize_reward=True, visualize_reward: bool = True,
from_pixels=False, from_pixels: bool = False,
height=84, height: int = 84,
width=84, width: int = 84,
camera_id=0, camera_id: int = 0,
frame_skip=1, frame_skip: int = 1,
episode_length=1000, episode_length: Union[None, int] = None,
environment_kwargs=None, environment_kwargs: dict = {},
time_limit=None, time_limit: Union[None, float] = None,
channels_first=True channels_first: bool = True
): ):
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py
# License: MIT # License: MIT
@ -31,12 +33,16 @@ def make(
assert not visualize_reward, 'cannot use visualize reward when learning from pixels' assert not visualize_reward, 'cannot use visualize reward when learning from pixels'
# shorten episode length # shorten episode length
if episode_length is None:
# Default lengths for benchmarking suite is 1000 and for manipulation tasks 250
episode_length = 250 if domain_name == "manipulation" else 1000
max_episode_steps = (episode_length + frame_skip - 1) // frame_skip max_episode_steps = (episode_length + frame_skip - 1) // frame_skip
if env_id not in gym.envs.registry.env_specs: if env_id not in gym.envs.registry.env_specs:
task_kwargs = {} task_kwargs = {'random': seed}
# if seed is not None: # if seed is not None:
task_kwargs['random'] = seed # task_kwargs['random'] = seed
if time_limit is not None: if time_limit is not None:
task_kwargs['time_limit'] = time_limit task_kwargs['time_limit'] = time_limit
register( register(

View File

@ -1,6 +1,7 @@
# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py
# License: MIT # License: MIT
# Copyright (c) 2020 Denis Yarats # Copyright (c) 2020 Denis Yarats
import collections
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
import numpy as np import numpy as np
@ -31,12 +32,21 @@ def _spec_to_box(spec):
return spaces.Box(low, high, dtype=np.float32) return spaces.Box(low, high, dtype=np.float32)
def _flatten_obs(obs): def _flatten_obs(obs: collections.MutableMapping):
obs_pieces = [] # obs_pieces = []
for v in obs.values(): # for v in obs.values():
flat = np.array([v]) if np.isscalar(v) else v.ravel() # flat = np.array([v]) if np.isscalar(v) else v.ravel()
obs_pieces.append(flat) # obs_pieces.append(flat)
return np.concatenate(obs_pieces, axis=0) # return np.concatenate(obs_pieces, axis=0)
if not isinstance(obs, collections.MutableMapping):
raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.')
# Keep key order consistent for non OrderedDicts
keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys())
obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys]
return np.concatenate(obs_vals)
class DMCWrapper(core.Env): class DMCWrapper(core.Env):
@ -75,7 +85,7 @@ class DMCWrapper(core.Env):
self._action_space = _spec_to_box([self._env.action_spec()]) self._action_space = _spec_to_box([self._env.action_spec()])
self._observation_space = _spec_to_box(self._env.observation_spec().values()) self._observation_space = _spec_to_box(self._env.observation_spec().values())
self._last_observation = None self._last_state = None
self.viewer = None self.viewer = None
# set seed # set seed
@ -107,6 +117,10 @@ class DMCWrapper(core.Env):
def action_space(self): def action_space(self):
return self._action_space return self._action_space
@property
def dt(self):
return self._env.control_timestep() * self._frame_skip
def seed(self, seed=None): def seed(self, seed=None):
self._action_space.seed(seed) self._action_space.seed(seed)
self._observation_space.seed(seed) self._observation_space.seed(seed)
@ -123,19 +137,19 @@ class DMCWrapper(core.Env):
if done: if done:
break break
self._last_observation = _flatten_obs(time_step.observation) self._last_state = _flatten_obs(time_step.observation)
obs = self._get_obs(time_step) obs = self._get_obs(time_step)
extra['discount'] = time_step.discount extra['discount'] = time_step.discount
return obs, reward, done, extra return obs, reward, done, extra
def reset(self) -> np.ndarray: def reset(self) -> np.ndarray:
time_step = self._env.reset() time_step = self._env.reset()
self._last_observation = _flatten_obs(time_step.observation) self._last_state = _flatten_obs(time_step.observation)
obs = self._get_obs(time_step) obs = self._get_obs(time_step)
return obs return obs
def render(self, mode='rgb_array', height=None, width=None, camera_id=0): def render(self, mode='rgb_array', height=None, width=None, camera_id=0):
if self._last_observation is None: if self._last_state is None:
raise ValueError('Environment not ready to render. Call reset() first.') raise ValueError('Environment not ready to render. Call reset() first.')
# assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode

View File

@ -3,7 +3,7 @@ from typing import Iterable, List, Type
import gym import gym
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
@ -32,7 +32,7 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0):
def make_env(env_id: str, seed, **kwargs): def make_env(env_id: str, seed, **kwargs):
""" """
Converts an env_id to an environment with the gym API. Converts an env_id to an environment with the gym API.
This also works for DeepMind Control Suite env_wrappers This also works for DeepMind Control Suite interface_wrappers
for which domain name and task name are expected to be separated by "-". for which domain name and task name are expected to be separated by "-".
Args: Args:
env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks
@ -42,7 +42,7 @@ def make_env(env_id: str, seed, **kwargs):
""" """
try: try:
# Add seed to kwargs in case it is a predefined dmc environment. # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment.
if env_id.startswith("dmc"): if env_id.startswith("dmc"):
kwargs.update({"seed": seed}) kwargs.update({"seed": seed})