2021-08-19 09:30:54 +02:00
|
|
|
import alr_envs
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
|
2022-07-12 10:06:38 +02:00
|
|
|
def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True):
|
2021-06-28 17:25:53 +02:00
|
|
|
"""
|
2022-07-12 10:06:38 +02:00
|
|
|
Example for running a black box based environment, which is already registered
|
2021-06-28 17:25:53 +02:00
|
|
|
Args:
|
2022-07-12 10:06:38 +02:00
|
|
|
env_name: Black box env_id
|
2021-07-02 13:09:56 +02:00
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
iterations: Number of rollout steps to run
|
|
|
|
render: Render the episode
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
2022-07-12 10:06:38 +02:00
|
|
|
# Equivalent to gym, we have make function which can be used to create environments.
|
|
|
|
# It takes care of seeding and enables the use of a variety of external environments using the gym interface.
|
2021-08-19 09:30:54 +02:00
|
|
|
env = alr_envs.make(env_name, seed)
|
2021-06-29 16:17:18 +02:00
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
rewards = 0
|
|
|
|
# env.render(mode=None)
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
# number of samples/full trajectories (multiple environment steps)
|
|
|
|
for i in range(iterations):
|
|
|
|
|
|
|
|
if render and i % 2 == 0:
|
|
|
|
# This renders the full MP trajectory
|
|
|
|
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
|
|
|
# Resetting to no rendering, can be achieved by render(mode=None).
|
|
|
|
# It is also possible to change the mode multiple times when
|
|
|
|
# e.g. only every second trajectory should be displayed, such as here
|
|
|
|
# Just make sure the correct mode is set before executing the step.
|
|
|
|
env.render(mode="human")
|
|
|
|
else:
|
|
|
|
env.render(mode=None)
|
|
|
|
|
2022-07-12 10:06:38 +02:00
|
|
|
# Now the action space is not the raw action but the parametrization of the trajectory generator,
|
2022-07-12 14:33:20 +02:00
|
|
|
# such as a ProMP. You can still use it the same, though.
|
2021-07-02 13:09:56 +02:00
|
|
|
ac = env.action_space.sample()
|
2022-07-12 10:06:38 +02:00
|
|
|
# This executes a full trajectory
|
2021-07-02 13:09:56 +02:00
|
|
|
obs, reward, done, info = env.step(ac)
|
2022-07-12 14:33:20 +02:00
|
|
|
# Aggregated reward of trajectory
|
2021-07-02 13:09:56 +02:00
|
|
|
rewards += reward
|
|
|
|
|
|
|
|
if done:
|
|
|
|
print(rewards)
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
|
2022-07-06 09:05:35 +02:00
|
|
|
def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render=True):
|
2021-07-02 13:09:56 +02:00
|
|
|
"""
|
|
|
|
Example for running a motion primitive based environment, which is already registered
|
|
|
|
Args:
|
|
|
|
env_name: DMP env_id
|
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
iterations: Number of rollout steps to run
|
|
|
|
render: Render the episode
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
2022-07-06 09:05:35 +02:00
|
|
|
# Changing the arguments of the black box env is possible by providing them to gym as with all kwargs.
|
2022-07-12 14:33:20 +02:00
|
|
|
# E.g. here for adding a lot of basis functions
|
|
|
|
env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000})
|
2022-06-30 14:08:54 +02:00
|
|
|
# mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}})
|
|
|
|
# mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}})
|
|
|
|
|
2022-07-07 10:47:04 +02:00
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
# This time rendering every trajectory
|
|
|
|
if render:
|
|
|
|
env.render(mode="human")
|
2021-06-29 16:17:18 +02:00
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
# number of samples/full trajectories (multiple environment steps)
|
2021-07-02 13:09:56 +02:00
|
|
|
for i in range(iterations):
|
2022-07-11 16:18:18 +02:00
|
|
|
ac = env.action_space.sample()
|
2021-06-28 17:25:53 +02:00
|
|
|
obs, reward, done, info = env.step(ac)
|
|
|
|
rewards += reward
|
|
|
|
|
|
|
|
if done:
|
2022-07-11 16:18:18 +02:00
|
|
|
print(i, rewards)
|
2021-06-28 17:25:53 +02:00
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
2022-07-11 16:18:18 +02:00
|
|
|
|
|
|
|
return obs
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
2021-06-28 17:25:53 +02:00
|
|
|
"""
|
|
|
|
Example for running a custom motion primitive based environments.
|
2021-06-29 16:17:18 +02:00
|
|
|
Our already registered environments follow the same structure.
|
|
|
|
Hence, this also allows to adjust hyperparameters of the motion primitives.
|
|
|
|
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
|
2021-06-28 17:25:53 +02:00
|
|
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
|
|
|
for our repo: https://github.com/ALRhub/alr_envs/
|
|
|
|
Args:
|
|
|
|
seed: seed
|
2021-07-02 13:09:56 +02:00
|
|
|
iterations: Number of rollout steps to run
|
|
|
|
render: Render the episode
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
base_env = "alr_envs:HoleReacher-v1"
|
2021-07-02 13:09:56 +02:00
|
|
|
|
2022-06-30 14:08:54 +02:00
|
|
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
2021-06-28 17:25:53 +02:00
|
|
|
# You can also add other gym.Wrappers in case they are needed.
|
2021-11-30 12:05:19 +01:00
|
|
|
wrappers = [alr_envs.alr.classic_control.hole_reacher.MPWrapper]
|
2021-06-28 17:25:53 +02:00
|
|
|
mp_kwargs = {
|
|
|
|
"num_dof": 5,
|
|
|
|
"num_basis": 5,
|
|
|
|
"duration": 2,
|
|
|
|
"learn_goal": True,
|
|
|
|
"alpha_phase": 2,
|
|
|
|
"bandwidth_factor": 2,
|
|
|
|
"policy_type": "velocity",
|
|
|
|
"weights_scale": 50,
|
|
|
|
"goal_scale": 0.1
|
|
|
|
}
|
2021-08-19 09:30:54 +02:00
|
|
|
env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
2021-06-28 17:25:53 +02:00
|
|
|
# OR for a deterministic ProMP:
|
2022-06-29 09:37:18 +02:00
|
|
|
# env = make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=traj_gen_kwargs)
|
2021-07-02 13:09:56 +02:00
|
|
|
|
|
|
|
if render:
|
|
|
|
env.render(mode="human")
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
# number of samples/full trajectories (multiple environment steps)
|
2021-07-02 13:09:56 +02:00
|
|
|
for i in range(iterations):
|
2021-06-28 17:25:53 +02:00
|
|
|
ac = env.action_space.sample()
|
|
|
|
obs, reward, done, info = env.step(ac)
|
|
|
|
rewards += reward
|
|
|
|
|
|
|
|
if done:
|
|
|
|
print(rewards)
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2022-07-11 16:18:18 +02:00
|
|
|
render = False
|
2022-06-30 14:08:54 +02:00
|
|
|
# # DMP
|
|
|
|
# example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render)
|
|
|
|
#
|
|
|
|
# # ProMP
|
|
|
|
# example_mp("alr_envs:HoleReacherProMP-v1", seed=10, iterations=1, render=render)
|
|
|
|
#
|
|
|
|
# # DetProMP
|
|
|
|
# example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render)
|
2021-07-02 13:09:56 +02:00
|
|
|
|
|
|
|
# Altered basis functions
|
2022-07-11 16:18:18 +02:00
|
|
|
obs1 = example_custom_mp("dmc:manipulation-stack_2_bricks_features", seed=10, iterations=250, render=render)
|
2021-06-28 17:25:53 +02:00
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
# Custom MP
|
2022-07-06 09:05:35 +02:00
|
|
|
# example_fully_custom_mp(seed=10, iterations=1, render=render)
|