2021-07-30 11:59:02 +02:00
|
|
|
import alr_envs
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
|
2022-07-12 17:24:12 +02:00
|
|
|
def example_dmc(env_id="dmc:fish-swim", seed=1, iterations=1000, render=True):
|
2021-07-02 13:09:56 +02:00
|
|
|
"""
|
|
|
|
Example for running a DMC based env in the step based setting.
|
2022-07-12 17:24:12 +02:00
|
|
|
The env_id has to be specified as `domain_name:task_name` or
|
|
|
|
for manipulation tasks as `domain_name:manipulation-environment_name`
|
2021-07-02 13:09:56 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
env_id: Either `domain_name-task_name` or `manipulation-environment_name`
|
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
iterations: Number of rollout steps to run
|
|
|
|
render: Render the episode
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
2021-08-19 09:30:54 +02:00
|
|
|
env = alr_envs.make(env_id, seed)
|
2021-06-28 17:25:53 +02:00
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
2021-06-30 15:00:36 +02:00
|
|
|
print("observation shape:", env.observation_space.shape)
|
|
|
|
print("action shape:", env.action_space.shape)
|
2021-06-28 17:25:53 +02:00
|
|
|
|
2021-06-30 15:00:36 +02:00
|
|
|
for i in range(iterations):
|
2021-06-28 17:25:53 +02:00
|
|
|
ac = env.action_space.sample()
|
2022-07-12 17:24:12 +02:00
|
|
|
if render:
|
|
|
|
env.render(mode="human")
|
2021-06-28 17:25:53 +02:00
|
|
|
obs, reward, done, info = env.step(ac)
|
|
|
|
rewards += reward
|
|
|
|
|
|
|
|
if done:
|
2021-07-02 13:09:56 +02:00
|
|
|
print(env_id, rewards)
|
2021-06-28 17:25:53 +02:00
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
2021-06-30 15:00:36 +02:00
|
|
|
env.close()
|
2021-07-19 14:05:25 +02:00
|
|
|
del env
|
2021-06-30 15:00:36 +02:00
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
2021-06-28 17:25:53 +02:00
|
|
|
"""
|
2021-07-02 13:09:56 +02:00
|
|
|
Example for running a custom motion primitive based environments.
|
|
|
|
Our already registered environments follow the same structure.
|
|
|
|
Hence, this also allows to adjust hyperparameters of the motion primitives.
|
|
|
|
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
|
2021-06-28 17:25:53 +02:00
|
|
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
|
|
|
for our repo: https://github.com/ALRhub/alr_envs/
|
|
|
|
Args:
|
2021-07-02 13:09:56 +02:00
|
|
|
seed: seed for deterministic behaviour
|
|
|
|
iterations: Number of rollout steps to run
|
|
|
|
render: Render the episode
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2021-07-02 13:09:56 +02:00
|
|
|
# Base DMC name, according to structure of above example
|
2022-07-12 17:24:12 +02:00
|
|
|
base_env_id = "dmc:ball_in_cup-catch"
|
2021-07-02 13:09:56 +02:00
|
|
|
|
2022-06-30 14:08:54 +02:00
|
|
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
2021-06-28 17:25:53 +02:00
|
|
|
# You can also add other gym.Wrappers in case they are needed.
|
2021-08-23 17:24:55 +02:00
|
|
|
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
|
2022-07-12 17:24:12 +02:00
|
|
|
# # For a ProMP
|
|
|
|
trajectory_generator_kwargs = {'trajectory_generator_type': 'promp'}
|
|
|
|
phase_generator_kwargs = {'phase_generator_type': 'linear'}
|
|
|
|
controller_kwargs = {'controller_type': 'motor',
|
|
|
|
"p_gains": 1.0,
|
|
|
|
"d_gains": 0.1,}
|
|
|
|
basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
|
|
|
|
'num_basis': 5,
|
|
|
|
'num_basis_zero_start': 1
|
|
|
|
}
|
|
|
|
|
|
|
|
# For a DMP
|
|
|
|
# trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp'}
|
|
|
|
# phase_generator_kwargs = {'phase_generator_type': 'exp',
|
|
|
|
# 'alpha_phase': 2}
|
|
|
|
# controller_kwargs = {'controller_type': 'motor',
|
|
|
|
# "p_gains": 1.0,
|
|
|
|
# "d_gains": 0.1,
|
|
|
|
# }
|
|
|
|
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
|
|
|
# 'num_basis': 5
|
|
|
|
# }
|
|
|
|
env = alr_envs.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
|
|
|
|
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
|
|
|
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
|
|
|
seed=seed)
|
2021-07-02 13:09:56 +02:00
|
|
|
|
|
|
|
# This renders the full MP trajectory
|
|
|
|
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
|
|
|
# Resetting to no rendering, can be achieved by render(mode=None).
|
|
|
|
# It is also possible to change them mode multiple times when
|
|
|
|
# e.g. only every nth trajectory should be displayed.
|
|
|
|
if render:
|
|
|
|
env.render(mode="human")
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
|
|
|
# number of samples/full trajectories (multiple environment steps)
|
2021-07-02 13:09:56 +02:00
|
|
|
for i in range(iterations):
|
2021-06-28 17:25:53 +02:00
|
|
|
ac = env.action_space.sample()
|
|
|
|
obs, reward, done, info = env.step(ac)
|
|
|
|
rewards += reward
|
|
|
|
|
|
|
|
if done:
|
2022-07-12 17:24:12 +02:00
|
|
|
print(base_env_id, rewards)
|
2021-06-28 17:25:53 +02:00
|
|
|
rewards = 0
|
|
|
|
obs = env.reset()
|
|
|
|
|
2021-06-30 15:00:36 +02:00
|
|
|
env.close()
|
2021-07-19 14:05:25 +02:00
|
|
|
del env
|
2021-06-30 15:00:36 +02:00
|
|
|
|
2021-06-28 17:25:53 +02:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-06-29 16:17:18 +02:00
|
|
|
# Disclaimer: DMC environments require the seed to be specified in the beginning.
|
|
|
|
# Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics.
|
|
|
|
|
2021-06-30 15:00:36 +02:00
|
|
|
# For rendering DMC
|
|
|
|
# export MUJOCO_GL="osmesa"
|
2022-07-12 17:24:12 +02:00
|
|
|
render = True
|
2021-06-30 15:00:36 +02:00
|
|
|
|
2021-07-19 14:05:25 +02:00
|
|
|
# # Standard DMC Suite tasks
|
2022-07-12 17:24:12 +02:00
|
|
|
example_dmc("dmc:fish-swim", seed=10, iterations=1000, render=render)
|
|
|
|
#
|
|
|
|
# # Manipulation tasks
|
|
|
|
# # Disclaimer: The vision versions are currently not integrated and yield an error
|
|
|
|
example_dmc("dmc:manipulation-reach_site_features", seed=10, iterations=250, render=render)
|
|
|
|
#
|
|
|
|
# # Gym + DMC hybrid task provided in the MP framework
|
2021-11-30 16:11:32 +01:00
|
|
|
example_dmc("dmc_ball_in_cup-catch_promp-v0", seed=10, iterations=1, render=render)
|
2021-06-29 16:17:18 +02:00
|
|
|
|
2022-07-12 17:24:12 +02:00
|
|
|
# Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is
|
|
|
|
# already registered above
|
2021-07-19 14:05:25 +02:00
|
|
|
example_custom_dmc_and_mp(seed=11, iterations=1, render=render)
|