diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 90c60e4..b1056a2 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -4,6 +4,7 @@ from gym.envs.registration import register from alr_envs.classic_control.hole_reacher.hole_reacher_mp_wrapper import HoleReacherMPWrapper from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import SimpleReacherMPWrapper from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper +from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock @@ -518,6 +519,48 @@ register( } ) +## DMC + +register( + id=f'dmc_ball_in_cup_dmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"ball_in_cup-catch", + "wrappers": [DMCBallInCupMPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + } +) + +register( + id=f'dmc_ball_in_cup_detpmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"ball_in_cup-catch", + "wrappers": [DMCBallInCupMPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 2, + "width": 0.025, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True + } + } +) + + # BBO functions for dim in [5, 10, 25, 50, 100]: diff --git a/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py b/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py index 5be3baf..97959d7 100644 --- a/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py +++ b/alr_envs/dmc/Ball_in_the_cup_mp_wrapper.py @@ -5,7 +5,7 @@ import numpy as np from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper -class BallInCupMPWrapper(MPEnvWrapper): +class DMCBallInCupMPWrapper(MPEnvWrapper): @property def active_obs(self): diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py index a55d467..d1a5e8e 100644 --- a/alr_envs/examples/examples_dmc.py +++ b/alr_envs/examples/examples_dmc.py @@ -1,14 +1,15 @@ -from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import BallInCupMPWrapper +from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.utils.make_env_helpers import make_dmp_env, make_env -def example_dmc(env_name="fish-swim", seed=1): +def example_dmc(env_name="fish-swim", seed=1, iterations=1000): env = make_env(env_name, seed) rewards = 0 obs = env.reset() + print(obs) - # number of samples/full trajectories (multiple environment steps) - for i in range(2000): + # number of samples(multiple environment steps) + for i in range(10): ac = env.action_space.sample() obs, reward, done, info = env.step(ac) rewards += reward @@ -37,7 +38,7 @@ def example_custom_dmc_and_mp(seed=1): # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # You can also add other gym.Wrappers in case they are needed. # wrappers = [HoleReacherMPWrapper] - wrappers = [BallInCupMPWrapper] + wrappers = [DMCBallInCupMPWrapper] mp_kwargs = { "num_dof": 2, # env.start_pos "num_basis": 5, @@ -69,5 +70,14 @@ def example_custom_dmc_and_mp(seed=1): if __name__ == '__main__': - example_dmc() + # Disclaimer: DMC environments require the seed to be specified in the beginning. + # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics. + + # Standard DMC task + example_dmc("fish_swim", seed=10, iterations=1000) + + # Gym + DMC hybrid task provided in the MP framework + example_dmc("dmc_ball_in_cup_dmp-v0", seed=10, iterations=10) + + # Custom DMC task example_custom_dmc_and_mp() diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py index d3755d7..60a77c1 100644 --- a/alr_envs/examples/examples_motion_primitives.py +++ b/alr_envs/examples/examples_motion_primitives.py @@ -15,6 +15,22 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1): # While in this case gym.make() is possible to use as well, we recommend our custom make env function. # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. env = make_env(env_name, seed) + + # Changing the mp_kwargs is possible by providing them to gym. + # E.g. here by providing way to many basis functions + # mp_kwargs = { + # "num_dof": 5, + # "num_basis": 1000, + # "duration": 2, + # "learn_goal": True, + # "alpha_phase": 2, + # "bandwidth_factor": 2, + # "policy_type": "velocity", + # "weights_scale": 50, + # "goal_scale": 0.1 + # } + # env = make_env(env_name, seed, mp_kwargs=mp_kwargs) + rewards = 0 # env.render(mode=None) obs = env.reset() @@ -40,8 +56,9 @@ def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1): def example_custom_mp(seed=1): """ Example for running a custom motion primitive based environments. - Our already registered environments follow the same structure, but do not directly allow for modifications. - Hence, this also allows to adjust hyperparameters of the motion primitives more easily. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the motion primitives. + Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. We appreciate PRs for custom environments (especially MP wrappers of existing tasks) for our repo: https://github.com/ALRhub/alr_envs/ Args: diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py index 5b60ba3..1eb4e9e 100644 --- a/alr_envs/utils/__init__.py +++ b/alr_envs/utils/__init__.py @@ -35,8 +35,8 @@ def make( if env_id not in gym.envs.registry.env_specs: task_kwargs = {} - if seed is not None: - task_kwargs['random'] = seed + # if seed is not None: + task_kwargs['random'] = seed if time_limit is not None: task_kwargs['time_limit'] = time_limit register( diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index 2a3ccf7..63eb8b9 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -42,6 +42,10 @@ def make_env(env_id: str, seed, **kwargs): """ try: + # Add seed to kwargs in case it is a predefined dmc environment. + if env_id.startswith("dmc"): + kwargs.update({"seed": seed}) + # Gym env = gym.make(env_id, **kwargs) env.seed(seed) @@ -70,7 +74,7 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1 # _env = gym.make(env_id) _env = make_env(env_id, seed, **kwargs) - assert any(issubclass(w, MPEnvWrapper) for w in wrappers),\ + assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \ "At least an MPEnvWrapper is required in order to leverage motion primitive environments." for w in wrappers: _env = w(_env) @@ -125,7 +129,9 @@ def make_dmp_env_helper(**kwargs): Returns: DMP wrapped gym env """ - return make_dmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs")) + seed = kwargs.get("seed", None) + return make_dmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, + **kwargs.get("mp_kwargs")) def make_detpmp_env_helper(**kwargs): @@ -143,7 +149,9 @@ def make_detpmp_env_helper(**kwargs): Returns: DMP wrapped gym env """ - return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), **kwargs.get("mp_kwargs")) + seed = kwargs.get("seed", None) + return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, + **kwargs.get("mp_kwargs")) def make_contextual_env(env_id, context, seed, rank):