Docs: Show usage using new api functionality

This commit is contained in:
Dominik Moritz Roth 2023-10-29 13:25:35 +01:00
parent 668765b145
commit f1d3b5333f

View File

@ -45,10 +45,14 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True): def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True):
""" """
Example for running a movement primitive based environment, which is already registered Example for running a custom movement primitive based environments.
Our already registered environments follow the same structure.
Hence, this also allows to adjust hyperparameters of the movement primitives.
Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
for our repo: https://github.com/ALRhub/fancy_gym/
Args: Args:
env_name: DMP env_id seed: seed
seed: seed for deterministic behaviour
iterations: Number of rollout steps to run iterations: Number of rollout steps to run
render: Render the episode render: Render the episode
@ -78,6 +82,44 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1,
return obs return obs
class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper):
mp_config = {
'ProMP': {
'trajectory_generator_kwargs': {
'trajectory_generator_type': 'promp',
'weights_scale': 2
},
'phase_generator_kwargs': {
'phase_generator_type': 'linear'
},
'controller_kwargs': {
'controller_type': 'velocity'
},
'basis_generator_kwargs': {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
},
'DMP': {
'trajectory_generator_kwargs': {
'trajectory_generator_type': 'dmp',
'weights_scale': 500
},
'phase_generator_kwargs': {
'phase_generator_type': 'exp',
'alpha_phase': 2.5
},
'controller_kwargs': {
'controller_type': 'velocity'
},
'basis_generator_kwargs': {
'basis_generator_type': 'rbf',
'num_basis': 5
}
}
}
def example_fully_custom_mp(seed=1, iterations=1, render=True): def example_fully_custom_mp(seed=1, iterations=1, render=True):
""" """
@ -97,35 +139,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
""" """
base_env_id = "fancy/Reacher5d-v0" base_env_id = "fancy/Reacher5d-v0"
custom_env_id = "fancy/Reacher5d-Custom-v0"
custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0"
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper. fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id)
# You can also add other gym.Wrappers in case they are needed.
wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper]
# For a ProMP env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
trajectory_generator_kwargs = {'trajectory_generator_type': 'promp',
'weights_scale': 2}
phase_generator_kwargs = {'phase_generator_type': 'linear'}
controller_kwargs = {'controller_type': 'velocity'}
basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
# # For a DMP
# trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp',
# 'weights_scale': 500}
# phase_generator_kwargs = {'phase_generator_type': 'exp',
# 'alpha_phase': 2.5}
# controller_kwargs = {'controller_type': 'velocity'}
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
# 'num_basis': 5
# }
raw_env = gym.make(base_env_id, render_mode='human' if render else None)
env = fancy_gym.make_bb(env=raw_env, wrappers=wrappers, black_box_kwargs={},
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
seed=seed)
if render: if render:
env.render() env.render()
@ -144,6 +164,75 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()
def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True):
"""
Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.
Args:
seed: seed
iterations: Number of rollout steps to run
render: Render the episode
Returns:
"""
base_env_id = "fancy/Reacher5d-v0"
custom_env_id = "fancy/Reacher5d-Custom-v0"
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': {
'trajectory_generator_kwargs': {
'trajectory_generator_type': 'promp',
'weights_scale': 2
},
'phase_generator_kwargs': {
'phase_generator_type': 'linear'
},
'controller_kwargs': {
'controller_type': 'velocity'
},
'basis_generator_kwargs': {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 1
}
}})
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
if render:
env.render()
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if terminated or truncated:
print(rewards)
rewards = 0
obs = env.reset()
if render:
env.render()
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(iterations):
ac = env.action_space.sample()
obs, reward, terminated, truncated, info = env.step(ac)
rewards += reward
if terminated or truncated:
print(rewards)
rewards = 0
obs = env.reset()
def main(): def main():
render = False render = False
@ -165,6 +254,7 @@ def main():
# Custom MP # Custom MP
example_fully_custom_mp(seed=10, iterations=1, render=render) example_fully_custom_mp(seed=10, iterations=1, render=render)
example_fully_custom_mp_alternative(seed=10, iterations=1, render=render)
if __name__=='__main__': if __name__=='__main__':
main() main()