diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index b1056a2..a526ff7 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -4,9 +4,8 @@ from gym.envs.registration import register from alr_envs.classic_control.hole_reacher.hole_reacher_mp_wrapper import HoleReacherMPWrapper from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import SimpleReacherMPWrapper from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper -from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper +from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper -from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock # Mujoco @@ -204,7 +203,7 @@ register( "hole_width": None, "hole_depth": 1, "hole_x": None, - "collision_penalty": 1000, + "collision_penalty": 100, } ) @@ -236,7 +235,7 @@ register( "hole_width": 0.25, "hole_depth": 1, "hole_x": 2, - "collision_penalty": 1000, + "collision_penalty": 100, } ) @@ -354,7 +353,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', kwargs={ "name": "alr_envs:ALRBallInACupSimple-v0", - "wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], + "wrappers": [BallInACupMPWrapper], "mp_kwargs": { "num_dof": 3, "num_basis": 5, @@ -379,7 +378,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', kwargs={ "name": "alr_envs:ALRBallInACup-v0", - "wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], + "wrappers": [BallInACupMPWrapper], "mp_kwargs": { "num_dof": 7, "num_basis": 5, @@ -404,7 +403,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": "alr_envs:ALRBallInACupSimple-v0", - "wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], + "wrappers": [BallInACupMPWrapper], "mp_kwargs": { "num_dof": 3, "num_basis": 5, @@ -429,7 +428,7 @@ register( entry_point='alr_envs.mujoco.ball_in_a_cup.biac_pd:make_detpmp_env_helper', kwargs={ "name": "alr_envs:ALRBallInACupPDSimple-v0", - "wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], + "wrappers": [BallInACupMPWrapper], "mp_kwargs": { "num_dof": 3, "num_basis": 5, @@ -474,7 +473,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": "alr_envs:ALRBallInACupSimple-v0", - "wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], + "wrappers": [BallInACupMPWrapper], "mp_kwargs": { "num_dof": 7, "num_basis": 5, @@ -486,7 +485,6 @@ register( "zero_start": True, "zero_goal": True, "policy_kwargs": { - "p_gains": np.array([4. / 3., 2.4, 2.5, 5. / 3., 2., 2., 1.25]), "d_gains": np.array([0.0466, 0.12, 0.125, 0.04166, 0.06, 0.06, 0.025]) } @@ -499,7 +497,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_contextual_env', kwargs={ "name": "alr_envs:ALRBallInACupGoal-v0", - "wrappers": [BallInACupMPWrapper, BallInACupPositionalWrapper], + "wrappers": [BallInACupMPWrapper], "mp_kwargs": { "num_dof": 7, "num_basis": 5, @@ -522,7 +520,7 @@ register( ## DMC register( - id=f'dmc_ball_in_cup_dmp-v0', + id=f'dmc_ball_in_cup-catch_dmp-v0', entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', # max_episode_steps=1, kwargs={ @@ -531,19 +529,23 @@ register( "mp_kwargs": { "num_dof": 2, "num_basis": 5, - "duration": 2, + "duration": 20, "learn_goal": True, "alpha_phase": 2, "bandwidth_factor": 2, - "policy_type": "velocity", + "policy_type": "motor", "weights_scale": 50, - "goal_scale": 0.1 + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 0.2, + "d_gains": 0.05 + } } } ) register( - id=f'dmc_ball_in_cup_detpmp-v0', + id=f'dmc_ball_in_cup-catch_detpmp-v0', entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', kwargs={ "name": f"ball_in_cup-catch", @@ -551,16 +553,19 @@ register( "mp_kwargs": { "num_dof": 2, "num_basis": 5, - "duration": 2, + "duration": 20, "width": 0.025, "policy_type": "velocity", "weights_scale": 0.2, - "zero_start": True + "zero_start": True, + "policy_kwargs": { + "p_gains": 0.2, + "d_gains": 0.05 + } } } ) - # BBO functions for dim in [5, 10, 25, 50, 100]: diff --git a/alr_envs/classic_control/hole_reacher/hole_reacher.py b/alr_envs/classic_control/hole_reacher/hole_reacher.py index dfa2599..a09dab1 100644 --- a/alr_envs/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/classic_control/hole_reacher/hole_reacher.py @@ -69,9 +69,17 @@ class HoleReacherEnv(gym.Env): def dt(self) -> Union[float, int]: return self._dt + # @property + # def start_pos(self): + # return self._start_pos + @property - def start_pos(self): - return self._start_pos + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() def step(self, action: np.ndarray): """ diff --git a/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py b/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py index a2fd2a7..12b5d19 100644 --- a/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py +++ b/alr_envs/classic_control/hole_reacher/hole_reacher_mp_wrapper.py @@ -1,8 +1,8 @@ -from typing import Union +from typing import Tuple, Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class HoleReacherMPWrapper(MPEnvWrapper): @@ -19,11 +19,15 @@ class HoleReacherMPWrapper(MPEnvWrapper): ]) @property - def start_pos(self) -> Union[float, int, np.ndarray]: - return self.env.start_pos + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos @property - def goal_pos(self) -> Union[float, int, np.ndarray]: + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: raise ValueError("Goal position is not available and has to be learnt based on the environment.") @property diff --git a/alr_envs/classic_control/simple_reacher/simple_reacher.py b/alr_envs/classic_control/simple_reacher/simple_reacher.py index 157913f..08c151f 100644 --- a/alr_envs/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/classic_control/simple_reacher/simple_reacher.py @@ -59,9 +59,17 @@ class SimpleReacherEnv(gym.Env): def dt(self) -> Union[float, int]: return self._dt + # @property + # def start_pos(self): + # return self._start_pos + @property - def start_pos(self): - return self._start_pos + def current_pos(self): + return self._joint_angles + + @property + def current_vel(self): + return self._angle_velocity def step(self, action: np.ndarray): """ diff --git a/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py b/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py index ee96fc6..40426cf 100644 --- a/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py +++ b/alr_envs/classic_control/simple_reacher/simple_reacher_mp_wrapper.py @@ -1,8 +1,8 @@ -from typing import Union +from typing import Tuple, Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class SimpleReacherMPWrapper(MPEnvWrapper): @@ -17,11 +17,15 @@ class SimpleReacherMPWrapper(MPEnvWrapper): ]) @property - def start_pos(self): - return self.env.start_pos + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos @property - def goal_pos(self): + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: raise ValueError("Goal position is not available and has to be learnt based on the environment.") @property diff --git a/alr_envs/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/classic_control/viapoint_reacher/viapoint_reacher.py index 0257da0..99f8bcf 100644 --- a/alr_envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -63,9 +63,17 @@ class ViaPointReacher(gym.Env): def dt(self): return self._dt + # @property + # def start_pos(self): + # return self._start_pos + @property - def start_pos(self): - return self._start_pos + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() def step(self, action: np.ndarray): """ diff --git a/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py b/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py index 9764d4d..a4a6ba3 100644 --- a/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py +++ b/alr_envs/classic_control/viapoint_reacher/viapoint_reacher_mp_wrapper.py @@ -1,8 +1,8 @@ -from typing import Union +from typing import Tuple, Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class ViaPointReacherMPWrapper(MPEnvWrapper): @@ -18,11 +18,15 @@ class ViaPointReacherMPWrapper(MPEnvWrapper): ]) @property - def start_pos(self) -> Union[float, int, np.ndarray]: - return self.env.start_pos + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos @property - def goal_pos(self) -> Union[float, int, np.ndarray]: + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: raise ValueError("Goal position is not available and has to be learnt based on the environment.") @property diff --git a/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py b/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py deleted file mode 100644 index 97959d7..0000000 --- a/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Union - -import numpy as np - -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper - - -class DMCBallInCupMPWrapper(MPEnvWrapper): - - @property - def active_obs(self): - # Besides the ball position, the environment is always set to 0. - return np.hstack([ - [False] * 2, # cup position - [True] * 2, # ball position - [False] * 2, # cup velocity - [False] * 2, # ball velocity - ]) - - @property - def start_pos(self) -> Union[float, int, np.ndarray]: - return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']]) - - @property - def dt(self) -> Union[float, int]: - # Taken from: https://github.com/deepmind/dm_control/blob/master/dm_control/suite/ball_in_cup.py#L27 - return 0.02 diff --git a/alr_envs/dmc/ball_in_cup/__init__.py b/alr_envs/dmc/ball_in_cup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/alr_envs/dmc/ball_in_cup/ball_in_the_cup_mp_wrapper.py b/alr_envs/dmc/ball_in_cup/ball_in_the_cup_mp_wrapper.py new file mode 100644 index 0000000..539f4e9 --- /dev/null +++ b/alr_envs/dmc/ball_in_cup/ball_in_the_cup_mp_wrapper.py @@ -0,0 +1,34 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper + + +class DMCBallInCupMPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # Besides the ball position, the environment is always set to 0. + return np.hstack([ + [False] * 2, # cup position + [True] * 2, # ball position + [False] * 2, # cup velocity + [False] * 2, # ball velocity + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return np.hstack([self.physics.named.data.qpos['cup_x'], self.physics.named.data.qpos['cup_z']]) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return np.hstack([self.physics.named.data.qvel['cup_x'], self.physics.named.data.qvel['cup_z']]) + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py index 433b12c..d99b037 100644 --- a/alr_envs/examples/examples_dmc.py +++ b/alr_envs/examples/examples_dmc.py @@ -1,9 +1,23 @@ -from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper +from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.utils.make_env_helpers import make_dmp_env, make_env -def example_dmc(env_name="fish-swim", seed=1, iterations=1000): - env = make_env(env_name, seed) +def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + env = make_env(env_id, seed) rewards = 0 obs = env.reset() print("observation shape:", env.observation_space.shape) @@ -15,39 +29,44 @@ def example_dmc(env_name="fish-swim", seed=1, iterations=1000): obs, reward, done, info = env.step(ac) rewards += reward - env.render("human") + if render: + env.render("human") if done: - print(env_name, rewards) + print(env_id, rewards) rewards = 0 obs = env.reset() env.close() -def example_custom_dmc_and_mp(seed=1): +def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): """ - Example for running a custom motion primitive based environments based off of a dmc task. - Our already registered environments follow the same structure, but do not directly allow for modifications. - Hence, this also allows to adjust hyperparameters of the motion primitives more easily. + Example for running a custom motion primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the motion primitives. + Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. We appreciate PRs for custom environments (especially MP wrappers of existing tasks) for our repo: https://github.com/ALRhub/alr_envs/ Args: - seed: seed + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode Returns: """ + # Base DMC name, according to structure of above example base_env = "ball_in_cup-catch" + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # You can also add other gym.Wrappers in case they are needed. - # wrappers = [HoleReacherMPWrapper] wrappers = [DMCBallInCupMPWrapper] mp_kwargs = { "num_dof": 2, # env.start_pos "num_basis": 5, - "duration": 2, + "duration": 20, "learn_goal": True, "alpha_phase": 2, "bandwidth_factor": 2, @@ -57,14 +76,21 @@ def example_custom_dmc_and_mp(seed=1): } env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # OR for a deterministic ProMP: - # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, **mp_args) + # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) + + # This renders the full MP trajectory + # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + # Resetting to no rendering, can be achieved by render(mode=None). + # It is also possible to change them mode multiple times when + # e.g. only every nth trajectory should be displayed. + if render: + env.render(mode="human") rewards = 0 obs = env.reset() - env.render("human") # number of samples/full trajectories (multiple environment steps) - for i in range(10): + for i in range(iterations): ac = env.action_space.sample() obs, reward, done, info = env.step(ac) rewards += reward @@ -85,14 +111,14 @@ if __name__ == '__main__': # export MUJOCO_GL="osmesa" # Standard DMC Suite tasks - example_dmc("fish-swim", seed=10, iterations=100) + example_dmc("fish-swim", seed=10, iterations=1000, render=True) # Manipulation tasks - # The vision versions are currently not integrated - example_dmc("manipulation-reach_site_features", seed=10, iterations=100) + # Disclaimer: The vision versions are currently not integrated and yield an error + example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=True) # Gym + DMC hybrid task provided in the MP framework - example_dmc("dmc_ball_in_cup_dmp-v0", seed=10, iterations=10) + example_dmc("dmc_ball_in_cup-catch_detpmp-v0", seed=10, iterations=1, render=True) # Custom DMC task - example_custom_dmc_and_mp() + example_custom_dmc_and_mp(seed=10, iterations=1, render=True) diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py index e602da6..99ca8f6 100644 --- a/alr_envs/examples/examples_general.py +++ b/alr_envs/examples/examples_general.py @@ -4,14 +4,23 @@ from collections import defaultdict import gym import numpy as np -from alr_envs.utils.make_env_helpers import make_env +from alr_envs.utils.make_env_helpers import make_env, make_env_rank from alr_envs.utils.mp_env_async_sampler import AlrContextualMpEnvSampler, AlrMpEnvSampler, DummyDist -def example_general(env_id: str, seed=1, iterations=1000): +def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True): """ Example for running any env in the step based setting. This also includes DMC environments when leveraging our custom make_env function. + + Args: + env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + """ env = make_env(env_id, seed) @@ -25,7 +34,7 @@ def example_general(env_id: str, seed=1, iterations=1000): obs, reward, done, info = env.step(env.action_space.sample()) rewards += reward - if i % 1 == 0: + if render: env.render() if done: @@ -34,36 +43,60 @@ def example_general(env_id: str, seed=1, iterations=1000): obs = env.reset() -def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)): - def sample(env: gym.vector.VectorEnv, n_samples=100): - # for plotting - rewards = np.zeros(n_cpu) +def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): + """ + Example for running any env in a vectorized multiprocessing setting to generate more samples faster. + This also includes DMC and DMP environments when leveraging our custom make_env function. + Be aware, increasing the number of environments reduces the total length of the individual episodes. - # this would generate more samples than requested if n_samples % num_envs != 0 - repeat = int(np.ceil(n_samples / env.num_envs)) - vals = defaultdict(list) - for i in range(repeat): - obs, reward, done, info = envs.step(envs.action_space.sample()) - vals['obs'].append(obs) - vals['reward'].append(reward) - vals['done'].append(done) - vals['info'].append(info) - rewards += reward - if np.any(done): - print(rewards[done]) - rewards[done] = 0 + Args: + env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks + seed: seed for deterministic behaviour + n_cpu: Number of cpus cores to use in parallel + n_samples: number of samples generated in total by all environments. - # do not return values above threshold - return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),) + Returns: Tuple of (obs, reward, done, info) with type np.ndarray - from alr_envs.utils.make_env_helpers import make_env_rank - envs = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)]) + """ + env = gym.vector.AsyncVectorEnv([make_env_rank(env_id, seed, i) for i in range(n_cpu)]) + # OR # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)]) - obs = envs.reset() - print(sample(envs, 16)) + # for plotting + rewards = np.zeros(n_cpu) + buffer = defaultdict(list) + + obs = env.reset() + + # this would generate more samples than requested if n_samples % num_envs != 0 + repeat = int(np.ceil(n_samples / env.num_envs)) + for i in range(repeat): + obs, reward, done, info = env.step(env.action_space.sample()) + buffer['obs'].append(obs) + buffer['reward'].append(reward) + buffer['done'].append(done) + buffer['info'].append(info) + rewards += reward + if np.any(done): + print(f"Reward at iteration {i}: {rewards[done]}") + rewards[done] = 0 + + # do not return values above threshold + return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), if __name__ == '__main__': - # Mujoco task from framework - example_general("alr_envs:ALRReacher-v0") + # Basic gym task + # example_general("Pendulum-v0", seed=10, iterations=200, render=True) + # + # # Basis task from framework + # example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=True) + # + # # OpenAI Mujoco task + # example_general("HalfCheetah-v2", seed=10, render=True) + # + # # Mujoco task from framework + # example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=True) + + # Vectorized multiprocessing environments + example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index f3de01c..f374a0d 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -2,12 +2,14 @@ from alr_envs import HoleReacherMPWrapper from alr_envs.utils.make_env_helpers import make_dmp_env, make_env -def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1): +def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True): """ Example for running a motion primitive based environment, which is already registered Args: env_name: DMP env_id - seed: seed + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode Returns: @@ -16,44 +18,81 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1): # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. env = make_env(env_name, seed) - # Changing the mp_kwargs is possible by providing them to gym. - # E.g. here by providing way to many basis functions - # mp_kwargs = { - # "num_dof": 5, - # "num_basis": 1000, - # "duration": 2, - # "learn_goal": True, - # "alpha_phase": 2, - # "bandwidth_factor": 2, - # "policy_type": "velocity", - # "weights_scale": 50, - # "goal_scale": 0.1 - # } - # env = make_env(env_name, seed, mp_kwargs=mp_kwargs) - rewards = 0 # env.render(mode=None) obs = env.reset() # number of samples/full trajectories (multiple environment steps) - for i in range(10): + for i in range(iterations): + + if render and i % 2 == 0: + # This renders the full MP trajectory + # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + # Resetting to no rendering, can be achieved by render(mode=None). + # It is also possible to change the mode multiple times when + # e.g. only every second trajectory should be displayed, such as here + # Just make sure the correct mode is set before executing the step. + env.render(mode="human") + else: + env.render(mode=None) + ac = env.action_space.sample() obs, reward, done, info = env.step(ac) rewards += reward - if i % 1 == 0: - # render full DMP trajectory - # render can only be called once in the beginning as well. That would render every trajectory - # Calling it after every trajectory allows to modify the mode. mode=None, disables rendering. - env.render(mode="human") - if done: print(rewards) rewards = 0 obs = env.reset() -def example_custom_mp(seed=1): +def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True): + """ + Example for running a motion primitive based environment, which is already registered + Args: + env_name: DMP env_id + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + # Changing the mp_kwargs is possible by providing them to gym. + # E.g. here by providing way to many basis functions + mp_kwargs = { + "num_dof": 5, + "num_basis": 1000, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + env = make_env(env_name, seed, mp_kwargs=mp_kwargs) + + # This time rendering every trajectory + if render: + env.render(mode="human") + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_fully_custom_mp(seed=1, iterations=1, render=True): """ Example for running a custom motion primitive based environments. Our already registered environments follow the same structure. @@ -63,12 +102,15 @@ def example_custom_mp(seed=1): for our repo: https://github.com/ALRhub/alr_envs/ Args: seed: seed + iterations: Number of rollout steps to run + render: Render the episode Returns: """ base_env = "alr_envs:HoleReacher-v1" + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # You can also add other gym.Wrappers in case they are needed. wrappers = [HoleReacherMPWrapper] @@ -85,19 +127,16 @@ def example_custom_mp(seed=1): } env = make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) # OR for a deterministic ProMP: - # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed) + # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + + if render: + env.render(mode="human") rewards = 0 - # render full DMP trajectory - # It is only required to call render() once in the beginning, which renders every consecutive trajectory. - # Resetting to no rendering, can be achieved by render(mode=None). - # It is also possible to change them mode multiple times when - # e.g. only every nth trajectory should be displayed. - env.render(mode="human") obs = env.reset() # number of samples/full trajectories (multiple environment steps) - for i in range(10): + for i in range(iterations): ac = env.action_space.sample() obs, reward, done, info = env.step(ac) rewards += reward @@ -110,10 +149,13 @@ def example_custom_mp(seed=1): if __name__ == '__main__': # DMP - example_mp("alr_envs:HoleReacherDMP-v1") + example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=True) # DetProMP - example_mp("alr_envs:HoleReacherDetPMP-v1") + example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=True) - # Custom DMP - example_custom_mp() + # Altered basis functions + example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=True) + + # Custom MP + example_fully_custom_mp(seed=10, iterations=1, render=True) diff --git a/alr_envs/mujoco/alr_mujoco_env.py b/alr_envs/mujoco/alr_mujoco_env.py deleted file mode 100644 index 01384c2..0000000 --- a/alr_envs/mujoco/alr_mujoco_env.py +++ /dev/null @@ -1,270 +0,0 @@ -from collections import OrderedDict -import os -from abc import abstractmethod - - -from gym import error, spaces -from gym.utils import seeding -import numpy as np -from os import path - -from alr_envs.utils.mps.alr_env import AlrEnv -from alr_envs.utils.positional_env import PositionalEnv - -try: - import mujoco_py -except ImportError as e: - raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e)) - -DEFAULT_SIZE = 500 - - -def convert_observation_to_space(observation): - if isinstance(observation, dict): - space = spaces.Dict(OrderedDict([ - (key, convert_observation_to_space(value)) - for key, value in observation.items() - ])) - elif isinstance(observation, np.ndarray): - low = np.full(observation.shape, -float('inf'), dtype=np.float32) - high = np.full(observation.shape, float('inf'), dtype=np.float32) - space = spaces.Box(low, high, dtype=observation.dtype) - else: - raise NotImplementedError(type(observation), observation) - - return space - - -class AlrMujocoEnv(PositionalEnv, AlrEnv): - """ - Superclass for all MuJoCo environments. - """ - - def __init__(self, model_path, n_substeps, apply_gravity_comp=True): - """ - - Args: - model_path: path to xml file - n_substeps: how many steps mujoco does per call to env.step - apply_gravity_comp: Whether gravity compensation should be active - """ - if model_path.startswith("/"): - fullpath = model_path - else: - fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) - if not path.exists(fullpath): - raise IOError("File %s does not exist" % fullpath) - self.n_substeps = n_substeps - self.apply_gravity_comp = apply_gravity_comp - self.model = mujoco_py.load_model_from_path(fullpath) - self.sim = mujoco_py.MjSim(self.model, nsubsteps=n_substeps) - self.data = self.sim.data - self.viewer = None - self._viewers = {} - - self.metadata = { - 'render.modes': ['human', 'rgb_array', 'depth_array'], - 'video.frames_per_second': int(np.round(1.0 / self.dt)) - } - - self.init_qpos = self.sim.data.qpos.ravel().copy() - self.init_qvel = self.sim.data.qvel.ravel().copy() - - self._start_pos = None - self._start_vel = None - - self._set_action_space() - - observation = self._get_obs() # TODO: is calling get_obs enough? should we call reset, or even step? - - self._set_observation_space(observation) - - self.seed() - - @property - def current_pos(self): - """ - By default returns the joint positions of all simulated objects. May be overridden in subclass. - """ - return self.sim.data.qpos - - @property - def current_vel(self): - """ - By default returns the joint velocities of all simulated objects. May be overridden in subclass. - """ - return self.sim.data.qvel - - @property - def start_pos(self): - """ - Start position of the agent, for example joint angles of a Panda robot. Necessary for MP wrapped simple_reacher. - """ - return self._start_pos - - @property - def start_vel(self): - """ - Start velocity of the agent. Necessary for MP wrapped simple_reacher. - """ - return self._start_vel - - def extend_des_pos(self, des_pos): - """ - In a simplified environment, the actions may only control a subset of all the joints. - Extend the trajectory to match the environments full action space - Args: - des_pos: - - Returns: - - """ - pass - - def extend_des_vel(self, des_vel): - pass - - def _set_action_space(self): - bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) - low, high = bounds.T - self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) - return self.action_space - - def _set_observation_space(self, observation): - self.observation_space = convert_observation_to_space(observation) - return self.observation_space - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - # methods to override: - # ---------------------------- - - @property - @abstractmethod - def active_obs(self): - """Returns boolean mask for each observation entry - whether the observation is returned for the contextual case or not. - This effectively allows to filter unwanted or unnecessary observations from the full step-based case. - """ - return np.ones(self.observation_space.shape, dtype=bool) - - def _get_obs(self): - """Returns the observation. - """ - raise NotImplementedError() - - def reset_model(self): - """ - Reset the robot degrees of freedom (qpos and qvel). - Implement this in each subclass. - """ - raise NotImplementedError - - def viewer_setup(self): - """ - This method is called when the viewer is initialized. - Optionally implement this method, if you need to tinker with camera position - and so forth. - """ - pass - - # ----------------------------- - - def reset(self): - self.sim.reset() - ob = self.reset_model() - return ob - - def set_state(self, qpos, qvel): - assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) - old_state = self.sim.get_state() - new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel, - old_state.act, old_state.udd_state) - self.sim.set_state(new_state) - self.sim.forward() - - @property - def dt(self): - return self.model.opt.timestep * self.n_substeps - - def do_simulation(self, ctrl): - """ - Additionally returns whether there was an error while stepping the simulation - """ - error_in_sim = False - num_actuations = len(ctrl) - if self.apply_gravity_comp: - self.sim.data.ctrl[:num_actuations] = ctrl + self.sim.data.qfrc_bias[:num_actuations].copy() / self.model.actuator_gear[:, 0] - else: - self.sim.data.ctrl[:num_actuations] = ctrl - - try: - self.sim.step() - except mujoco_py.builder.MujocoException: - error_in_sim = True - - return error_in_sim - - def render(self, - mode='human', - width=DEFAULT_SIZE, - height=DEFAULT_SIZE, - camera_id=None, - camera_name=None): - if mode == 'rgb_array' or mode == 'depth_array': - if camera_id is not None and camera_name is not None: - raise ValueError("Both `camera_id` and `camera_name` cannot be" - " specified at the same time.") - - no_camera_specified = camera_name is None and camera_id is None - if no_camera_specified: - camera_name = 'track' - - if camera_id is None and camera_name in self.model._camera_name2id: - camera_id = self.model.camera_name2id(camera_name) - - self._get_viewer(mode).render(width, height, camera_id=camera_id) - - if mode == 'rgb_array': - # window size used for old mujoco-py: - data = self._get_viewer(mode).read_pixels(width, height, depth=False) - # original image is upside-down, so flip it - return data[::-1, :, :] - elif mode == 'depth_array': - self._get_viewer(mode).render(width, height) - # window size used for old mujoco-py: - # Extract depth part of the read_pixels() tuple - data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1] - # original image is upside-down, so flip it - return data[::-1, :] - elif mode == 'human': - self._get_viewer(mode).render() - - def close(self): - if self.viewer is not None: - # self.viewer.finish() - self.viewer = None - self._viewers = {} - - def _get_viewer(self, mode): - self.viewer = self._viewers.get(mode) - if self.viewer is None: - if mode == 'human': - self.viewer = mujoco_py.MjViewer(self.sim) - elif mode == 'rgb_array' or mode == 'depth_array': - self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) - - self.viewer_setup() - self._viewers[mode] = self.viewer - return self.viewer - - def get_body_com(self, body_name): - return self.data.get_body_xpos(body_name) - - def state_vector(self): - return np.concatenate([ - self.sim.data.qpos.flat, - self.sim.data.qvel.flat - ]) diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py index 8f26901..f870877 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -1,10 +1,11 @@ from gym import utils import os import numpy as np -from alr_envs.mujoco import alr_mujoco_env +from gym.envs.mujoco import MujocoEnv -class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): + +class ALRBallInACupEnv(MujocoEnv, utils.EzPickle): def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False, reward_type: str = None, context: np.ndarray = None): utils.EzPickle.__init__(**locals()) diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py index 9393421..321358a 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py @@ -1,8 +1,8 @@ -from typing import Union +from typing import Tuple, Union import numpy as np -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper class BallInACupMPWrapper(MPEnvWrapper): @@ -24,6 +24,14 @@ class BallInACupMPWrapper(MPEnvWrapper): else: return self._start_pos + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qpos[0:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qvel[0:7].copy() + @property def goal_pos(self): # TODO: @Max I think the default value of returning to the start is reasonable here diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py deleted file mode 100644 index d1ba696..0000000 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_positional_wrapper.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Tuple, Union - -import numpy as np - -from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper - - -class BallInACupPositionalWrapper(PositionalEnvWrapper): - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qpos[0:7].copy() - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qvel[0:7].copy() diff --git a/alr_envs/mujoco/beerpong/beerpong.py b/alr_envs/mujoco/beerpong/beerpong.py index 5efc431..1f3f781 100644 --- a/alr_envs/mujoco/beerpong/beerpong.py +++ b/alr_envs/mujoco/beerpong/beerpong.py @@ -1,11 +1,15 @@ -from gym import utils import os + import numpy as np -from alr_envs.mujoco import alr_mujoco_env +from gym import utils +from gym.envs.mujoco import MujocoEnv -class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): - def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): + +class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): + def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None): + utils.EzPickle.__init__(**locals()) + MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -25,12 +29,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) self.context = None - - utils.EzPickle.__init__(self) - alr_mujoco_env.AlrMujocoEnv.__init__(self, - self.xml_path, - apply_gravity_comp=apply_gravity_comp, - n_substeps=n_substeps) + # alr_mujoco_env.AlrMujocoEnv.__init__(self, + # self.xml_path, + # apply_gravity_comp=apply_gravity_comp, + # n_substeps=n_substeps) self.sim_time = 8 # seconds self.sim_steps = int(self.sim_time / self.dt) diff --git a/alr_envs/mujoco/beerpong/beerpong_simple.py b/alr_envs/mujoco/beerpong/beerpong_simple.py index 7a0908d..5f6f6d2 100644 --- a/alr_envs/mujoco/beerpong/beerpong_simple.py +++ b/alr_envs/mujoco/beerpong/beerpong_simple.py @@ -1,11 +1,13 @@ from gym import utils import os import numpy as np -from alr_envs.mujoco import alr_mujoco_env +from gym.envs.mujoco import MujocoEnv -class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): +class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): + utils.EzPickle.__init__(**locals()) + self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -26,11 +28,12 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): self.context = None - utils.EzPickle.__init__(self) - alr_mujoco_env.AlrMujocoEnv.__init__(self, - self.xml_path, - apply_gravity_comp=apply_gravity_comp, - n_substeps=n_substeps) + MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps) + + # alr_mujoco_env.AlrMujocoEnv.__init__(self, + # self.xml_path, + # apply_gravity_comp=apply_gravity_comp, + # n_substeps=n_substeps) self.sim_time = 8 # seconds self.sim_steps = int(self.sim_time / self.dt) diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py index 1eb4e9e..6bdf5ec 100644 --- a/alr_envs/utils/__init__.py +++ b/alr_envs/utils/__init__.py @@ -1,22 +1,24 @@ +import collections import re +from typing import Union import gym from gym.envs.registration import register def make( - id, - seed=1, - visualize_reward=True, - from_pixels=False, - height=84, - width=84, - camera_id=0, - frame_skip=1, - episode_length=1000, - environment_kwargs=None, - time_limit=None, - channels_first=True + id: str, + seed: int = 1, + visualize_reward: bool = True, + from_pixels: bool = False, + height: int = 84, + width: int = 84, + camera_id: int = 0, + frame_skip: int = 1, + episode_length: Union[None, int] = None, + environment_kwargs: dict = {}, + time_limit: Union[None, float] = None, + channels_first: bool = True ): # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py # License: MIT @@ -31,12 +33,16 @@ def make( assert not visualize_reward, 'cannot use visualize reward when learning from pixels' # shorten episode length + if episode_length is None: + # Default lengths for benchmarking suite is 1000 and for manipulation tasks 250 + episode_length = 250 if domain_name == "manipulation" else 1000 + max_episode_steps = (episode_length + frame_skip - 1) // frame_skip if env_id not in gym.envs.registry.env_specs: - task_kwargs = {} + task_kwargs = {'random': seed} # if seed is not None: - task_kwargs['random'] = seed + # task_kwargs['random'] = seed if time_limit is not None: task_kwargs['time_limit'] = time_limit register( diff --git a/alr_envs/utils/dmc2gym_wrapper.py b/alr_envs/utils/dmc2gym_wrapper.py index b575db7..d1226e4 100644 --- a/alr_envs/utils/dmc2gym_wrapper.py +++ b/alr_envs/utils/dmc2gym_wrapper.py @@ -1,6 +1,7 @@ # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py # License: MIT # Copyright (c) 2020 Denis Yarats +import collections from typing import Any, Dict, Tuple import numpy as np @@ -31,12 +32,21 @@ def _spec_to_box(spec): return spaces.Box(low, high, dtype=np.float32) -def _flatten_obs(obs): - obs_pieces = [] - for v in obs.values(): - flat = np.array([v]) if np.isscalar(v) else v.ravel() - obs_pieces.append(flat) - return np.concatenate(obs_pieces, axis=0) +def _flatten_obs(obs: collections.MutableMapping): + # obs_pieces = [] + # for v in obs.values(): + # flat = np.array([v]) if np.isscalar(v) else v.ravel() + # obs_pieces.append(flat) + # return np.concatenate(obs_pieces, axis=0) + + if not isinstance(obs, collections.MutableMapping): + raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.') + + # Keep key order consistent for non OrderedDicts + keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys()) + + obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys] + return np.concatenate(obs_vals) class DMCWrapper(core.Env): @@ -75,7 +85,7 @@ class DMCWrapper(core.Env): self._action_space = _spec_to_box([self._env.action_spec()]) self._observation_space = _spec_to_box(self._env.observation_spec().values()) - self._last_observation = None + self._last_state = None self.viewer = None # set seed @@ -107,6 +117,10 @@ class DMCWrapper(core.Env): def action_space(self): return self._action_space + @property + def dt(self): + return self._env.control_timestep() * self._frame_skip + def seed(self, seed=None): self._action_space.seed(seed) self._observation_space.seed(seed) @@ -123,19 +137,19 @@ class DMCWrapper(core.Env): if done: break - self._last_observation = _flatten_obs(time_step.observation) + self._last_state = _flatten_obs(time_step.observation) obs = self._get_obs(time_step) extra['discount'] = time_step.discount return obs, reward, done, extra def reset(self) -> np.ndarray: time_step = self._env.reset() - self._last_observation = _flatten_obs(time_step.observation) + self._last_state = _flatten_obs(time_step.observation) obs = self._get_obs(time_step) return obs def render(self, mode='rgb_array', height=None, width=None, camera_id=0): - if self._last_observation is None: + if self._last_state is None: raise ValueError('Environment not ready to render. Call reset() first.') # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 8003640..6a8fce2 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -3,7 +3,7 @@ from typing import Iterable, List, Type import gym -from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper +from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper @@ -32,7 +32,7 @@ def make_env_rank(env_id: str, seed: int, rank: int = 0): def make_env(env_id: str, seed, **kwargs): """ Converts an env_id to an environment with the gym API. - This also works for DeepMind Control Suite env_wrappers + This also works for DeepMind Control Suite interface_wrappers for which domain name and task name are expected to be separated by "-". Args: env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks @@ -42,7 +42,7 @@ def make_env(env_id: str, seed, **kwargs): """ try: - # Add seed to kwargs in case it is a predefined dmc environment. + # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment. if env_id.startswith("dmc"): kwargs.update({"seed": seed})