metaworld examples
This commit is contained in:
parent
c39877ece0
commit
4f18a529b6
128
alr_envs/examples/examples_metaworld.py
Normal file
128
alr_envs/examples/examples_metaworld.py
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
import alr_envs
|
||||||
|
from alr_envs.meta.goal_and_object_change import MPWrapper
|
||||||
|
|
||||||
|
|
||||||
|
def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True):
|
||||||
|
"""
|
||||||
|
Example for running a MetaWorld based env in the step based setting.
|
||||||
|
The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always
|
||||||
|
return the observable goal version.
|
||||||
|
All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/
|
||||||
|
|
||||||
|
Args:
|
||||||
|
env_id: `task_name-v2`
|
||||||
|
seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
env = alr_envs.make(env_id, seed)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
print("observation shape:", env.observation_space.shape)
|
||||||
|
print("action shape:", env.action_space.shape)
|
||||||
|
|
||||||
|
for i in range(iterations):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if render:
|
||||||
|
# THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM
|
||||||
|
# TODO: Remove this, when Metaworld fixes its interface.
|
||||||
|
env.render(False)
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(env_id, rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
del env
|
||||||
|
|
||||||
|
|
||||||
|
def example_custom_dmc_and_mp(seed=1, iterations=1, render=True):
|
||||||
|
"""
|
||||||
|
Example for running a custom motion primitive based environments.
|
||||||
|
Our already registered environments follow the same structure.
|
||||||
|
Hence, this also allows to adjust hyperparameters of the motion primitives.
|
||||||
|
Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks.
|
||||||
|
We appreciate PRs for custom environments (especially MP wrappers of existing tasks)
|
||||||
|
for our repo: https://github.com/ALRhub/alr_envs/
|
||||||
|
Args:
|
||||||
|
seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code)
|
||||||
|
iterations: Number of rollout steps to run
|
||||||
|
render: Render the episode (TODO: currently not working due to an issue in MetaWorld code)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Base MetaWorld name, according to structure of above example
|
||||||
|
base_env = "button-press-v2"
|
||||||
|
|
||||||
|
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||||
|
# You can also add other gym.Wrappers in case they are needed.
|
||||||
|
wrappers = [MPWrapper]
|
||||||
|
mp_kwargs = {
|
||||||
|
"num_dof": 4,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 6.25,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.025,
|
||||||
|
"zero_start": True,
|
||||||
|
"policy_type": "metaworld",
|
||||||
|
}
|
||||||
|
|
||||||
|
env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs)
|
||||||
|
# OR for a DMP:
|
||||||
|
# env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs)
|
||||||
|
|
||||||
|
# This renders the full MP trajectory
|
||||||
|
# It is only required to call render() once in the beginning, which renders every consecutive trajectory.
|
||||||
|
# Resetting to no rendering, can be achieved by render(mode=None).
|
||||||
|
# It is also possible to change them mode multiple times when
|
||||||
|
# e.g. only every nth trajectory should be displayed.
|
||||||
|
if render:
|
||||||
|
raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. "
|
||||||
|
"A temporary workaround is to alter their code in MujocoEnv render() from "
|
||||||
|
"`if not offscreen` to `if not offscreen or offscreen == 'human'`.")
|
||||||
|
# TODO: Remove this, when Metaworld fixes its interface.
|
||||||
|
# env.render(mode="human")
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(iterations):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(base_env, rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
env.close()
|
||||||
|
del env
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# Disclaimer: MetaWorld environments require the seed to be specified in the beginning.
|
||||||
|
# Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior.
|
||||||
|
|
||||||
|
# For rendering it might be necessary to specify your OpenGL installation
|
||||||
|
# export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so
|
||||||
|
render = False
|
||||||
|
|
||||||
|
# # Standard DMC Suite tasks
|
||||||
|
example_dmc("button-press-v2", seed=10, iterations=500, render=render)
|
||||||
|
|
||||||
|
# MP + MetaWorld hybrid task provided in the our framework
|
||||||
|
example_dmc("ButtonPressDetPMP-v2", seed=10, iterations=1, render=render)
|
||||||
|
|
||||||
|
# Custom MetaWorld task
|
||||||
|
example_custom_dmc_and_mp(seed=10, iterations=1, render=render)
|
Loading…
Reference in New Issue
Block a user