Docs: Show usage using new api functionality
This commit is contained in:
parent
668765b145
commit
f1d3b5333f
@ -45,10 +45,14 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
||||
|
||||
def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True):
|
||||
"""
|
||||
Example for running a movement primitive based environment, which is already registered
|
||||
Example for running a custom movement primitive based environments.
|
||||
Our already registered environments follow the same structure.
|
||||
Hence, this also allows to adjust hyperparameters of the movement primitives.
|
||||
Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.
|
||||
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||
for our repo: https://github.com/ALRhub/fancy_gym/
|
||||
Args:
|
||||
env_name: DMP env_id
|
||||
seed: seed for deterministic behaviour
|
||||
seed: seed
|
||||
iterations: Number of rollout steps to run
|
||||
render: Render the episode
|
||||
|
||||
@ -78,6 +82,44 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1,
|
||||
|
||||
return obs
|
||||
|
||||
class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper):
|
||||
mp_config = {
|
||||
'ProMP': {
|
||||
'trajectory_generator_kwargs': {
|
||||
'trajectory_generator_type': 'promp',
|
||||
'weights_scale': 2
|
||||
},
|
||||
'phase_generator_kwargs': {
|
||||
'phase_generator_type': 'linear'
|
||||
},
|
||||
'controller_kwargs': {
|
||||
'controller_type': 'velocity'
|
||||
},
|
||||
'basis_generator_kwargs': {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
}
|
||||
},
|
||||
'DMP': {
|
||||
'trajectory_generator_kwargs': {
|
||||
'trajectory_generator_type': 'dmp',
|
||||
'weights_scale': 500
|
||||
},
|
||||
'phase_generator_kwargs': {
|
||||
'phase_generator_type': 'exp',
|
||||
'alpha_phase': 2.5
|
||||
},
|
||||
'controller_kwargs': {
|
||||
'controller_type': 'velocity'
|
||||
},
|
||||
'basis_generator_kwargs': {
|
||||
'basis_generator_type': 'rbf',
|
||||
'num_basis': 5
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
"""
|
||||
@ -97,35 +139,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
"""
|
||||
|
||||
base_env_id = "fancy/Reacher5d-v0"
|
||||
custom_env_id = "fancy/Reacher5d-Custom-v0"
|
||||
custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0"
|
||||
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
|
||||
|
||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
||||
# You can also add other gym.Wrappers in case they are needed.
|
||||
wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper]
|
||||
fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id)
|
||||
|
||||
# For a ProMP
|
||||
trajectory_generator_kwargs = {'trajectory_generator_type': 'promp',
|
||||
'weights_scale': 2}
|
||||
phase_generator_kwargs = {'phase_generator_type': 'linear'}
|
||||
controller_kwargs = {'controller_type': 'velocity'}
|
||||
basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
}
|
||||
|
||||
# # For a DMP
|
||||
# trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp',
|
||||
# 'weights_scale': 500}
|
||||
# phase_generator_kwargs = {'phase_generator_type': 'exp',
|
||||
# 'alpha_phase': 2.5}
|
||||
# controller_kwargs = {'controller_type': 'velocity'}
|
||||
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
||||
# 'num_basis': 5
|
||||
# }
|
||||
raw_env = gym.make(base_env_id, render_mode='human' if render else None)
|
||||
env = fancy_gym.make_bb(env=raw_env, wrappers=wrappers, black_box_kwargs={},
|
||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
||||
seed=seed)
|
||||
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
|
||||
|
||||
if render:
|
||||
env.render()
|
||||
@ -144,6 +164,75 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True):
|
||||
"""
|
||||
Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.
|
||||
Args:
|
||||
seed: seed
|
||||
iterations: Number of rollout steps to run
|
||||
render: Render the episode
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
base_env_id = "fancy/Reacher5d-v0"
|
||||
custom_env_id = "fancy/Reacher5d-Custom-v0"
|
||||
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
|
||||
|
||||
fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': {
|
||||
'trajectory_generator_kwargs': {
|
||||
'trajectory_generator_type': 'promp',
|
||||
'weights_scale': 2
|
||||
},
|
||||
'phase_generator_kwargs': {
|
||||
'phase_generator_type': 'linear'
|
||||
},
|
||||
'controller_kwargs': {
|
||||
'controller_type': 'velocity'
|
||||
},
|
||||
'basis_generator_kwargs': {
|
||||
'basis_generator_type': 'zero_rbf',
|
||||
'num_basis': 5,
|
||||
'num_basis_zero_start': 1
|
||||
}
|
||||
}})
|
||||
|
||||
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
|
||||
|
||||
if render:
|
||||
env.render()
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
rewards += reward
|
||||
|
||||
if terminated or truncated:
|
||||
print(rewards)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
if render:
|
||||
env.render()
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
# number of samples/full trajectories (multiple environment steps)
|
||||
for i in range(iterations):
|
||||
ac = env.action_space.sample()
|
||||
obs, reward, terminated, truncated, info = env.step(ac)
|
||||
rewards += reward
|
||||
|
||||
if terminated or truncated:
|
||||
print(rewards)
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
||||
def main():
|
||||
render = False
|
||||
@ -165,6 +254,7 @@ def main():
|
||||
|
||||
# Custom MP
|
||||
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||
example_fully_custom_mp_alternative(seed=10, iterations=1, render=render)
|
||||
|
||||
if __name__=='__main__':
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user