Merge pull request #85 from ALRhub/81_fix_docs_mp
This commit is contained in:
commit
f6b56d5b0f
@ -16,7 +16,7 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
|||||||
"""
|
"""
|
||||||
# Equivalent to gym, we have a make function which can be used to create environments.
|
# Equivalent to gym, we have a make function which can be used to create environments.
|
||||||
# It takes care of seeding and enables the use of a variety of external environments using the gym interface.
|
# It takes care of seeding and enables the use of a variety of external environments using the gym interface.
|
||||||
env = gym.make(env_name)
|
env = gym.make(env_name, render_mode='human' if render else None)
|
||||||
|
|
||||||
returns = 0
|
returns = 0
|
||||||
# env.render(mode=None)
|
# env.render(mode=None)
|
||||||
@ -26,14 +26,6 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
|||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
|
|
||||||
if render and i % 1 == 0:
|
if render and i % 1 == 0:
|
||||||
# This renders the full MP trajectory
|
|
||||||
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
|
||||||
# Resetting to no rendering, can be achieved by render(mode=None).
|
|
||||||
# It is also possible to change the mode multiple times when
|
|
||||||
# e.g. only every second trajectory should be displayed, such as here
|
|
||||||
# Just make sure the correct mode is set before executing the step.
|
|
||||||
env.render(mode="human")
|
|
||||||
else:
|
|
||||||
env.render()
|
env.render()
|
||||||
|
|
||||||
# Now the action space is not the raw action but the parametrization of the trajectory generator,
|
# Now the action space is not the raw action but the parametrization of the trajectory generator,
|
||||||
@ -49,14 +41,19 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend
|
|||||||
if terminated or truncated:
|
if terminated or truncated:
|
||||||
print(reward)
|
print(reward)
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
|
||||||
def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True):
|
def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1, render=True):
|
||||||
"""
|
"""
|
||||||
Example for running a movement primitive based environment, which is already registered
|
Example for running a custom movement primitive based environments.
|
||||||
|
Our already registered environments follow the same structure.
|
||||||
|
Hence, this also allows to adjust hyperparameters of the movement primitives.
|
||||||
|
Yet, we recommend the method above if you are just interested in changing those parameters for existing tasks.
|
||||||
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||||
|
for our repo: https://github.com/ALRhub/fancy_gym/
|
||||||
Args:
|
Args:
|
||||||
env_name: DMP env_id
|
seed: seed
|
||||||
seed: seed for deterministic behaviour
|
|
||||||
iterations: Number of rollout steps to run
|
iterations: Number of rollout steps to run
|
||||||
render: Render the episode
|
render: Render the episode
|
||||||
|
|
||||||
@ -65,14 +62,14 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1,
|
|||||||
"""
|
"""
|
||||||
# Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.
|
# Changing the arguments of the black box env is possible by providing them to gym through mp_config_override.
|
||||||
# E.g. here for way to many basis functions
|
# E.g. here for way to many basis functions
|
||||||
env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}})
|
env = gym.make(env_name, seed, mp_config_override={'basis_generator_kwargs': {'num_basis': 1000}}, render_mode='human' if render else None)
|
||||||
|
|
||||||
returns = 0
|
returns = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
# This time rendering every trajectory
|
# This time rendering every trajectory
|
||||||
if render:
|
if render:
|
||||||
env.render(mode="human")
|
env.render()
|
||||||
|
|
||||||
# number of samples/full trajectories (multiple environment steps)
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
for i in range(iterations):
|
for i in range(iterations):
|
||||||
@ -84,8 +81,47 @@ def example_custom_mp(env_name="fancy_ProMP/Reacher5d-v0", seed=1, iterations=1,
|
|||||||
print(i, reward)
|
print(i, reward)
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
env.close()
|
||||||
return obs
|
return obs
|
||||||
|
|
||||||
|
class Custom_MPWrapper(fancy_gym.envs.mujoco.reacher.MPWrapper):
|
||||||
|
mp_config = {
|
||||||
|
'ProMP': {
|
||||||
|
'trajectory_generator_kwargs': {
|
||||||
|
'trajectory_generator_type': 'promp',
|
||||||
|
'weights_scale': 2
|
||||||
|
},
|
||||||
|
'phase_generator_kwargs': {
|
||||||
|
'phase_generator_type': 'linear'
|
||||||
|
},
|
||||||
|
'controller_kwargs': {
|
||||||
|
'controller_type': 'velocity'
|
||||||
|
},
|
||||||
|
'basis_generator_kwargs': {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 5,
|
||||||
|
'num_basis_zero_start': 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'DMP': {
|
||||||
|
'trajectory_generator_kwargs': {
|
||||||
|
'trajectory_generator_type': 'dmp',
|
||||||
|
'weights_scale': 500
|
||||||
|
},
|
||||||
|
'phase_generator_kwargs': {
|
||||||
|
'phase_generator_type': 'exp',
|
||||||
|
'alpha_phase': 2.5
|
||||||
|
},
|
||||||
|
'controller_kwargs': {
|
||||||
|
'controller_type': 'velocity'
|
||||||
|
},
|
||||||
|
'basis_generator_kwargs': {
|
||||||
|
'basis_generator_type': 'rbf',
|
||||||
|
'num_basis': 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
||||||
"""
|
"""
|
||||||
@ -105,37 +141,92 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
base_env_id = "fancy/Reacher5d-v0"
|
base_env_id = "fancy/Reacher5d-v0"
|
||||||
|
custom_env_id = "fancy/Reacher5d-Custom-v0"
|
||||||
|
custom_env_id_DMP = "fancy_DMP/Reacher5d-Custom-v0"
|
||||||
|
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
|
||||||
|
|
||||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the RawInterfaceWrapper.
|
fancy_gym.upgrade(custom_env_id, mp_wrapper=Custom_MPWrapper, add_mp_types=['ProMP', 'DMP'], base_id=base_env_id)
|
||||||
# You can also add other gym.Wrappers in case they are needed.
|
|
||||||
wrappers = [fancy_gym.envs.mujoco.reacher.MPWrapper]
|
|
||||||
|
|
||||||
# For a ProMP
|
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
|
||||||
trajectory_generator_kwargs = {'trajectory_generator_type': 'promp',
|
|
||||||
'weights_scale': 2}
|
|
||||||
phase_generator_kwargs = {'phase_generator_type': 'linear'}
|
|
||||||
controller_kwargs = {'controller_type': 'velocity'}
|
|
||||||
basis_generator_kwargs = {'basis_generator_type': 'zero_rbf',
|
|
||||||
'num_basis': 5,
|
|
||||||
'num_basis_zero_start': 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# # For a DMP
|
rewards = 0
|
||||||
# trajectory_generator_kwargs = {'trajectory_generator_type': 'dmp',
|
obs = env.reset()
|
||||||
# 'weights_scale': 500}
|
|
||||||
# phase_generator_kwargs = {'phase_generator_type': 'exp',
|
|
||||||
# 'alpha_phase': 2.5}
|
|
||||||
# controller_kwargs = {'controller_type': 'velocity'}
|
|
||||||
# basis_generator_kwargs = {'basis_generator_type': 'rbf',
|
|
||||||
# 'num_basis': 5
|
|
||||||
# }
|
|
||||||
env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={},
|
|
||||||
traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs,
|
|
||||||
phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs,
|
|
||||||
seed=seed)
|
|
||||||
|
|
||||||
if render:
|
if render:
|
||||||
env.render(mode="human")
|
env.render()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(iterations):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, terminated, truncated, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if terminated or truncated:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
try: # Some mujoco-based envs don't correlcty implement .close
|
||||||
|
env.close()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True):
|
||||||
|
"""
|
||||||
|
Instead of defining the mp_args in a new custom MP_Wrapper, they can also be provided during registration.
|
||||||
|
Args:
|
||||||
|
seed: seed
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
base_env_id = "fancy/Reacher5d-v0"
|
||||||
|
custom_env_id = "fancy/Reacher5d-Custom-v0"
|
||||||
|
custom_env_id_ProMP = "fancy_ProMP/Reacher5d-Custom-v0"
|
||||||
|
|
||||||
|
fancy_gym.upgrade(custom_env_id, mp_wrapper=fancy_gym.envs.mujoco.reacher.MPWrapper, add_mp_types=['ProMP'], base_id=base_env_id, mp_config_override= {'ProMP': {
|
||||||
|
'trajectory_generator_kwargs': {
|
||||||
|
'trajectory_generator_type': 'promp',
|
||||||
|
'weights_scale': 2
|
||||||
|
},
|
||||||
|
'phase_generator_kwargs': {
|
||||||
|
'phase_generator_type': 'linear'
|
||||||
|
},
|
||||||
|
'controller_kwargs': {
|
||||||
|
'controller_type': 'velocity'
|
||||||
|
},
|
||||||
|
'basis_generator_kwargs': {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 5,
|
||||||
|
'num_basis_zero_start': 1
|
||||||
|
}
|
||||||
|
}})
|
||||||
|
|
||||||
|
env = gym.make(custom_env_id_ProMP, render_mode='human' if render else None)
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
if render:
|
||||||
|
env.render()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(iterations):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, terminated, truncated, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if terminated or truncated:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
if render:
|
||||||
|
env.render()
|
||||||
|
|
||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
@ -151,8 +242,13 @@ def example_fully_custom_mp(seed=1, iterations=1, render=True):
|
|||||||
rewards = 0
|
rewards = 0
|
||||||
obs = env.reset()
|
obs = env.reset()
|
||||||
|
|
||||||
|
try: # Some mujoco-based envs don't correlcty implement .close
|
||||||
|
env.close()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
def main():
|
||||||
render = False
|
render = False
|
||||||
# DMP
|
# DMP
|
||||||
example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render)
|
example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render)
|
||||||
@ -172,3 +268,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# Custom MP
|
# Custom MP
|
||||||
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
example_fully_custom_mp(seed=10, iterations=1, render=render)
|
||||||
|
example_fully_custom_mp_alternative(seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
|
if __name__=='__main__':
|
||||||
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user