fancy_gym/alr_envs/examples/examples_metaworld.py

import alr_envs


def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
    """
    Example for running a MetaWorld based env in the step based setting.
    The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
    return the observable goal version.
    All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/

    Args:
        env_id: `task_name-v2`
        seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
        iterations: Number of rollout steps to run
        render: Render the episode

    Returns:

    """
    env = alr_envs.make(env_id, seed)
    rewards = 0
    obs = env.reset()
    print("observation shape:", env.observation_space.shape)
    print("action shape:", env.action_space.shape)

    for i in range(iterations):
        ac = env.action_space.sample()
        obs, reward, done, info = env.step(ac)
        rewards += reward

        if render:
            # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
            # TODO: Remove this, when Metaworld fixes its interface.
            env.render(False)

        if done:
            print(env_id, rewards)
            rewards = 0
            obs = env.reset()

    env.close()
    del env


def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
    """
    Example for running a custom motion primitive based environments.
    Our already registered environments follow the same structure.
    Hence, this also allows to adjust hyperparameters of the motion primitives.
    Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
    We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
    for our repo: https://github.com/ALRhub/alr_envs/
    Args:
        seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
        iterations: Number of rollout steps to run
        render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)

    Returns:

    """

    # Base MetaWorld name, according to structure of above example
    base_env = "button-press-v2"

    # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
    # You can also add other gym.Wrappers in case they are needed.
    wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]
    mp_kwargs = {
        "num_dof": 4,  # degrees of fredom a.k.a. the old action space dimensionality
        "num_basis": 5,  # number of basis functions, the new action space has size num_dof x num_basis
        "duration": 6.25,  # length of trajectory in s, number of steps = duration / dt
        "post_traj_time": 0,  # pad trajectory with additional zeros at the end (recommended: 0)
        "width": 0.025,  # width of the basis functions
        "zero_start": True,  # start from current environment position if True
        "weights_scale": 1,  # scaling of MP weights
        "policy_type": "metaworld",  # custom tracking_controller type for metaworld environments
    }

    env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
    # OR for a DMP (other traj_gen_kwargs are required, see dmc_examples):
    # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs, **kwargs)

    # This renders the full MP trajectory
    # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
    # Resetting to no rendering, can be achieved by render(mode=None).
    # It is also possible to change them mode multiple times when
    # e.g. only every nth trajectory should be displayed.
    if render:
        raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. "
                         "A temporary workaround is to alter their code in MujocoEnv render() from "
                         "`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
        # TODO: Remove this, when Metaworld fixes its interface.
        # env.render(mode="human")

    rewards = 0
    obs = env.reset()

    # number of samples/full trajectories (multiple environment steps)
    for i in range(iterations):
        ac = env.action_space.sample()
        obs, reward, done, info = env.step(ac)
        rewards += reward

        if done:
            print(base_env, rewards)
            rewards = 0
            obs = env.reset()

    env.close()
    del env


if __name__ == '__main__':
    # Disclaimer: MetaWorld environments require the seed to be specified in the beginning.
    # Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.

    # For rendering it might be necessary to specify your OpenGL installation
    # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
    render = False

    # # Standard DMC Suite tasks
    example_dmc("button-press-v2", seed=10, iterations=500, render=render)

    # MP + MetaWorld hybrid task provided in the our framework
    example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render)

    # Custom MetaWorld task
    example_custom_dmc_and_mp(seed=10, iterations=1, render=render)
metaworld examples 2021-08-20 14:38:23 +02:00			`import alr_envs`


			`def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):`
			`"""`
			`Example for running a MetaWorld based env in the step based setting.`
			The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
			`return the observable goal version.`
			`All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/`

			`Args:`
			env_id: `task_name-v2`
			`seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)`
			`iterations: Number of rollout steps to run`
			`render: Render the episode`

			`Returns:`

			`"""`
			`env = alr_envs.make(env_id, seed)`
			`rewards = 0`
			`obs = env.reset()`
			`print("observation shape:", env.observation_space.shape)`
			`print("action shape:", env.action_space.shape)`

			`for i in range(iterations):`
			`ac = env.action_space.sample()`
			`obs, reward, done, info = env.step(ac)`
			`rewards += reward`

			`if render:`
			`# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM`
			`# TODO: Remove this, when Metaworld fixes its interface.`
			`env.render(False)`

			`if done:`
			`print(env_id, rewards)`
			`rewards = 0`
			`obs = env.reset()`

			`env.close()`
			`del env`


			`def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):`
			`"""`
			`Example for running a custom motion primitive based environments.`
			`Our already registered environments follow the same structure.`
			`Hence, this also allows to adjust hyperparameters of the motion primitives.`
			`Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.`
			`We appreciate PRs for custom environments (especially MP wrappers of existing tasks)`
			`for our repo: https://github.com/ALRhub/alr_envs/`
			`Args:`
			`seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)`
			`iterations: Number of rollout steps to run`
			`render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)`

			`Returns:`

			`"""`

			`# Base MetaWorld name, according to structure of above example`
			`base_env = "button-press-v2"`

			`# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.`
			`# You can also add other gym.Wrappers in case they are needed.`
added more documentation 2021-08-23 17:24:55 +02:00			`wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper]`
metaworld examples 2021-08-20 14:38:23 +02:00			`mp_kwargs = {`
added more documentation 2021-08-23 17:24:55 +02:00			`"num_dof": 4, # degrees of fredom a.k.a. the old action space dimensionality`
			`"num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis`
			`"duration": 6.25, # length of trajectory in s, number of steps = duration / dt`
			`"post_traj_time": 0, # pad trajectory with additional zeros at the end (recommended: 0)`
			`"width": 0.025, # width of the basis functions`
			`"zero_start": True, # start from current environment position if True`
			`"weights_scale": 1, # scaling of MP weights`
restructuring 2022-06-29 09:37:18 +02:00			`"policy_type": "metaworld", # custom tracking_controller type for metaworld environments`
metaworld examples 2021-08-20 14:38:23 +02:00			`}`

replaced all detpmp with promp 2021-11-30 16:11:32 +01:00			`env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)`
restructuring 2022-06-29 09:37:18 +02:00			`# OR for a DMP (other traj_gen_kwargs are required, see dmc_examples):`
			`# env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs, **kwargs)`
metaworld examples 2021-08-20 14:38:23 +02:00
			`# This renders the full MP trajectory`
			`# It is only required to call render() once in the beginning, which renders every consecutive trajectory.`
			`# Resetting to no rendering, can be achieved by render(mode=None).`
			`# It is also possible to change them mode multiple times when`
			`# e.g. only every nth trajectory should be displayed.`
			`if render:`
			`raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. "`
			`"A temporary workaround is to alter their code in MujocoEnv render() from "`
			"`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
			`# TODO: Remove this, when Metaworld fixes its interface.`
			`# env.render(mode="human")`

			`rewards = 0`
			`obs = env.reset()`

			`# number of samples/full trajectories (multiple environment steps)`
			`for i in range(iterations):`
			`ac = env.action_space.sample()`
			`obs, reward, done, info = env.step(ac)`
			`rewards += reward`

			`if done:`
			`print(base_env, rewards)`
			`rewards = 0`
			`obs = env.reset()`

			`env.close()`
			`del env`


			`if __name__ == '__main__':`
			`# Disclaimer: MetaWorld environments require the seed to be specified in the beginning.`
			`# Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.`

			`# For rendering it might be necessary to specify your OpenGL installation`
			`# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so`
			`render = False`

			`# # Standard DMC Suite tasks`
			`example_dmc("button-press-v2", seed=10, iterations=500, render=render)`

			`# MP + MetaWorld hybrid task provided in the our framework`
replaced all detpmp with promp 2021-11-30 16:11:32 +01:00			`example_dmc("ButtonPressProMP-v2", seed=10, iterations=1, render=render)`
metaworld examples 2021-08-20 14:38:23 +02:00
			`# Custom MetaWorld task`
			`example_custom_dmc_and_mp(seed=10, iterations=1, render=render)`