fancy_gym/alr_envs/examples/examples_dmc.py

import alr_envs


def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
    """
    Example for running a DMC based env in the step based setting.
    The env_id has to be specified as `domain_name-task_name` or
    for manipulation tasks as `manipulation-environment_name`

    Args:
        env_id: Either `domain_name-task_name` or `manipulation-environment_name`
        seed: seed for deterministic behaviour
        iterations: Number of rollout steps to run
        render: Render the episode

    Returns:

    """
    env = alr_envs.make(env_id, seed)
    rewards = 0
    obs = env.reset()
    print("observation shape:", env.observation_space.shape)
    print("action shape:", env.action_space.shape)

    for i in range(iterations):
        ac = env.action_space.sample()
        obs, reward, done, info = env.step(ac)
        rewards += reward

        if render:
            env.render("human")

        if done:
            print(env_id, rewards)
            rewards = 0
            obs = env.reset()

    env.close()
    del env


def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
    """
    Example for running a custom motion primitive based environments.
    Our already registered environments follow the same structure.
    Hence, this also allows to adjust hyperparameters of the motion primitives.
    Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
    We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
    for our repo: https://github.com/ALRhub/alr_envs/
    Args:
        seed: seed for deterministic behaviour
        iterations: Number of rollout steps to run
        render: Render the episode

    Returns:

    """

    # Base DMC name, according to structure of above example
    base_env = "ball_in_cup-catch"

    # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
    # You can also add other gym.Wrappers in case they are needed.
    wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
    mp_kwargs = {
        "num_dof": 2,  # degrees of fredom a.k.a. the old action space dimensionality
        "num_basis": 5,  # number of basis functions, the new action space has size num_dof x num_basis
        "duration": 20,  # length of trajectory in s, number of steps = duration / dt
        "learn_goal": True,  # learn the goal position (recommended)
        "alpha_phase": 2,
        "bandwidth_factor": 2,
        "policy_type": "motor",  # tracking_controller type, 'velocity', 'position', and 'motor' (torque control)
        "weights_scale": 1,  # scaling of MP weights
        "goal_scale": 1,  # scaling of learned goal position
        "policy_kwargs": {  # only required for torque control/PD-Controller
            "p_gains": 0.2,
            "d_gains": 0.05
        }
    }
    kwargs = {
        "time_limit": 20,  # same as duration value but as max horizon for underlying DMC environment
        "episode_length": 1000,  # corresponding number of episode steps
        # "frame_skip": 1
    }
    env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
    # OR for a deterministic ProMP (other traj_gen_kwargs are required, see metaworld_examples):
    # env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)

    # This renders the full MP trajectory
    # It is only required to call render() once in the beginning, which renders every consecutive trajectory.
    # Resetting to no rendering, can be achieved by render(mode=None).
    # It is also possible to change them mode multiple times when
    # e.g. only every nth trajectory should be displayed.
    if render:
        env.render(mode="human")

    rewards = 0
    obs = env.reset()

    # number of samples/full trajectories (multiple environment steps)
    for i in range(iterations):
        ac = env.action_space.sample()
        obs, reward, done, info = env.step(ac)
        rewards += reward

        if done:
            print(base_env, rewards)
            rewards = 0
            obs = env.reset()

    env.close()
    del env


if __name__ == '__main__':
    # Disclaimer: DMC environments require the seed to be specified in the beginning.
    # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.

    # For rendering DMC
    # export MUJOCO_GL="osmesa"
    render = False

    # # Standard DMC Suite tasks
    example_dmc("fish-swim", seed=10, iterations=1000, render=render)

    # Manipulation tasks
    # Disclaimer: The vision versions are currently not integrated and yield an error
    example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render)

    # Gym + DMC hybrid task provided in the MP framework
    example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render)

    # Custom DMC task
    # Different seed, because the episode is longer for this example and the name+seed combo is already registered above
    example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
fixed OpenAI fetch tasks; added nicer imports 2021-07-30 11:59:02 +02:00			`import alr_envs`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00

unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):`
			`"""`
			`Example for running a DMC based env in the step based setting.`
			The env_id has to be specified as `domain_name-task_name` or
			for manipulation tasks as `manipulation-environment_name`

			`Args:`
			env_id: Either `domain_name-task_name` or `manipulation-environment_name`
			`seed: seed for deterministic behaviour`
			`iterations: Number of rollout steps to run`
			`render: Render the episode`

			`Returns:`

			`"""`
integrated metaworld tasks into the framework 2021-08-19 09:30:54 +02:00			`env = alr_envs.make(env_id, seed)`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`rewards = 0`
			`obs = env.reset()`
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00			`print("observation shape:", env.observation_space.shape)`
			`print("action shape:", env.action_space.shape)`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00			`for i in range(iterations):`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`ac = env.action_space.sample()`
			`obs, reward, done, info = env.step(ac)`
			`rewards += reward`

unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`if render:`
			`env.render("human")`
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`if done:`
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`print(env_id, rewards)`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`rewards = 0`
			`obs = env.reset()`

added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00			`env.close()`
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`del env`
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`"""`
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`Example for running a custom motion primitive based environments.`
			`Our already registered environments follow the same structure.`
			`Hence, this also allows to adjust hyperparameters of the motion primitives.`
			`Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`We appreciate PRs for custom environments (especially MP wrappers of existing tasks)`
			`for our repo: https://github.com/ALRhub/alr_envs/`
			`Args:`
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`seed: seed for deterministic behaviour`
			`iterations: Number of rollout steps to run`
			`render: Render the episode`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00
			`Returns:`

			`"""`

unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`# Base DMC name, according to structure of above example`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`base_env = "ball_in_cup-catch"`
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.`
			`# You can also add other gym.Wrappers in case they are needed.`
added more documentation 2021-08-23 17:24:55 +02:00			`wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`mp_kwargs = {`
added more documentation 2021-08-23 17:24:55 +02:00			`"num_dof": 2, # degrees of fredom a.k.a. the old action space dimensionality`
			`"num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis`
			`"duration": 20, # length of trajectory in s, number of steps = duration / dt`
			`"learn_goal": True, # learn the goal position (recommended)`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`"alpha_phase": 2,`
			`"bandwidth_factor": 2,`
restructuring 2022-06-29 09:37:18 +02:00			`"policy_type": "motor", # tracking_controller type, 'velocity', 'position', and 'motor' (torque control)`
added more documentation 2021-08-23 17:24:55 +02:00			`"weights_scale": 1, # scaling of MP weights`
			`"goal_scale": 1, # scaling of learned goal position`
			`"policy_kwargs": { # only required for torque control/PD-Controller`
fixed ball in the cup gains 2021-07-02 17:35:40 +02:00			`"p_gains": 0.2,`
			`"d_gains": 0.05`
			`}`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`}`
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`kwargs = {`
added more documentation 2021-08-23 17:24:55 +02:00			`"time_limit": 20, # same as duration value but as max horizon for underlying DMC environment`
			`"episode_length": 1000, # corresponding number of episode steps`
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`# "frame_skip": 1`
			`}`
fixed OpenAI fetch tasks; added nicer imports 2021-07-30 11:59:02 +02:00			`env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)`
restructuring 2022-06-29 09:37:18 +02:00			`# OR for a deterministic ProMP (other traj_gen_kwargs are required, see metaworld_examples):`
			`# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)`
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00
			`# This renders the full MP trajectory`
			`# It is only required to call render() once in the beginning, which renders every consecutive trajectory.`
			`# Resetting to no rendering, can be achieved by render(mode=None).`
			`# It is also possible to change them mode multiple times when`
			`# e.g. only every nth trajectory should be displayed.`
			`if render:`
			`env.render(mode="human")`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00
			`rewards = 0`
			`obs = env.reset()`

			`# number of samples/full trajectories (multiple environment steps)`
unified API wrapper and updated examples 2021-07-02 13:09:56 +02:00			`for i in range(iterations):`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`ac = env.action_space.sample()`
			`obs, reward, done, info = env.step(ac)`
			`rewards += reward`

			`if done:`
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00			`print(base_env, rewards)`
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00			`rewards = 0`
			`obs = env.reset()`

added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00			`env.close()`
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`del env`
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00
added dmc2gym conversion and example how to leverage DMPs 2021-06-28 17:25:53 +02:00
			`if __name__ == '__main__':`
finalized examples and added seed control 2021-06-29 16:17:18 +02:00			`# Disclaimer: DMC environments require the seed to be specified in the beginning.`
			`# Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.`

added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00			`# For rendering DMC`
			`# export MUJOCO_GL="osmesa"`
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`render = False`
added rendering to DMC envs and updated examples 2021-06-30 15:00:36 +02:00
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`# # Standard DMC Suite tasks`
added DMC Reacher, cartpole, reach_size; removed BBO 2021-07-26 17:07:17 +02:00			`example_dmc("fish-swim", seed=10, iterations=1000, render=render)`

			`# Manipulation tasks`
			`# Disclaimer: The vision versions are currently not integrated and yield an error`
			`example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render)`
finalized examples and added seed control 2021-06-29 16:17:18 +02:00
			`# Gym + DMC hybrid task provided in the MP framework`
replaced all detpmp with promp 2021-11-30 16:11:32 +01:00			`example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render)`
finalized examples and added seed control 2021-06-29 16:17:18 +02:00
			`# Custom DMC task`
added sanity check for computed trajectory duration/length and environment episode length 2021-07-19 14:05:25 +02:00			`# Different seed, because the episode is longer for this example and the name+seed combo is already registered above`
			`example_custom_dmc_and_mp(seed=11, iterations=1, render=render)`