fancy_gym/alr_envs/examples/examples_movement_primitives.py

163 lines
5.5 KiB
Python
Raw Normal View History

import alr_envs
2022-07-12 10:06:38 +02:00
def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True):
"""
2022-07-12 10:06:38 +02:00
Example for running a black box based environment, which is already registered
Args:
2022-07-12 10:06:38 +02:00
env_name: Black box env_id
seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
2022-07-12 10:06:38 +02:00
# Equivalent to gym, we have make function which can be used to create environments.
# It takes care of seeding and enables the use of a variety of external environments using the gym interface.
env = alr_envs.make(env_name, seed)
rewards = 0
# env.render(mode=None)
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
if render and i % 2 == 0:
# This renders the full MP trajectory
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
# Resetting to no rendering, can be achieved by render(mode=None).
# It is also possible to change the mode multiple times when
# e.g. only every second trajectory should be displayed, such as here
# Just make sure the correct mode is set before executing the step.
env.render(mode="human")
else:
env.render(mode=None)
2022-07-12 10:06:38 +02:00
# Now the action space is not the raw action but the parametrization of the trajectory generator,
# such as a ProMP
ac = env.action_space.sample()
2022-07-12 10:06:38 +02:00
# This executes a full trajectory
obs, reward, done, info = env.step(ac)
2022-07-12 10:06:38 +02:00
# Aggregated reward
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
2022-07-06 09:05:35 +02:00
def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True):
"""
Example for running a motion primitive based environment, which is already registered
Args:
env_name: DMP env_id
seed: seed for deterministic behaviour
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
2022-07-06 09:05:35 +02:00
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
# E.g. here for way to many basis functions
2022-07-11 16:18:18 +02:00
# env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
env = alr_envs.make(env_name, seed)
2022-06-30 14:08:54 +02:00
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
2022-07-07 10:47:04 +02:00
rewards = 0
obs = env.reset()
# This time rendering every trajectory
if render:
env.render(mode="human")
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
2022-07-11 16:18:18 +02:00
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
2022-07-11 16:18:18 +02:00
print(i, rewards)
rewards = 0
obs = env.reset()
2022-07-11 16:18:18 +02:00
return obs
def example_fully_custom_mp(seed=1, iterations=1, render=True):
"""
Example for running a custom motion primitive based environments.
Our already registered environments follow the same structure.
Hence, this also allows to adjust hyperparameters of the motion primitives.
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/alr_envs/
Args:
seed: seed
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
base_env = "alr_envs:HoleReacher-v1"
2022-06-30 14:08:54 +02:00
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
# You can also add other gym.Wrappers in case they are needed.
2021-11-30 12:05:19 +01:00
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
mp_kwargs = {
"num_dof": 5,
"num_basis": 5,
"duration": 2,
"learn_goal": True,
"alpha_phase": 2,
"bandwidth_factor": 2,
"policy_type": "velocity",
"weights_scale": 50,
"goal_scale": 0.1
}
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
# OR for a deterministic ProMP:
2022-06-29 09:37:18 +02:00
# env = make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs)
if render:
env.render(mode="human")
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
if __name__ == '__main__':
2022-07-11 16:18:18 +02:00
render = False
2022-06-30 14:08:54 +02:00
# # DMP
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
#
# # ProMP
# example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
#
# # DetProMP
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
# Altered basis functions
2022-07-11 16:18:18 +02:00
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
# Custom MP
2022-07-06 09:05:35 +02:00
# example_fully_custom_mp(seed=10, iterations=1, render=render)