renaming to fancy_gym
This commit is contained in:
parent
d412fb229c
commit
8d1c1b44bf
49
README.md
49
README.md
@ -1,13 +1,12 @@
|
||||
## ALR Robotics Control Environments
|
||||
## Fancy Gym
|
||||
|
||||
This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
|
||||
We provide support (under the OpenAI interface) for the benchmark suites
|
||||
Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
|
||||
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
|
||||
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
|
||||
(DMC) and [Metaworld](https://meta-world.github.io/).
|
||||
Custom (Mujoco) gym environments can be created according
|
||||
to [this guide](https://www.gymlibrary.ml/content/environment_creation/).
|
||||
Unlike existing libraries, we additionally support to control agents with movement primitives, such as
|
||||
Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, we only consider the mean usually).
|
||||
(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
|
||||
to [this guide](https://www.gymlibrary.ml/content/environment_creation/). Unlike existing libraries, we additionally
|
||||
support to control agents with movement primitives, such as Dynamic Movement Primitives (DMPs) and Probabilistic
|
||||
Movement Primitives (ProMP, we only consider the mean usually).
|
||||
|
||||
## Movement Primitive Environments (Episode-Based/Black-Box Environments)
|
||||
|
||||
@ -59,14 +58,14 @@ pip install -e .
|
||||
|
||||
## Using the framework
|
||||
|
||||
We prepared [multiple examples](alr_envs/examples/), please have a look there for more specific examples.
|
||||
We prepared [multiple examples](fancy_gym/examples/), please have a look there for more specific examples.
|
||||
|
||||
### Step-wise environments
|
||||
|
||||
```python
|
||||
import alr_envs
|
||||
import fancy_gym
|
||||
|
||||
env = alr_envs.make('HoleReacher-v0', seed=1)
|
||||
env = fancy_gym.make('HoleReacher-v0', seed=1)
|
||||
state = env.reset()
|
||||
|
||||
for i in range(1000):
|
||||
@ -85,9 +84,9 @@ Existing MP tasks can be created the same way as above. Just keep in mind, calli
|
||||
trajectory.
|
||||
|
||||
```python
|
||||
import alr_envs
|
||||
import fancy_gym
|
||||
|
||||
env = alr_envs.make('HoleReacherProMP-v0', seed=1)
|
||||
env = fancy_gym.make('HoleReacherProMP-v0', seed=1)
|
||||
# render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None).
|
||||
env.render()
|
||||
|
||||
@ -104,19 +103,19 @@ To show all available environments, we provide some additional convenience. Each
|
||||
keys `DMP` and `ProMP` that store a list of available environment names.
|
||||
|
||||
```python
|
||||
import alr_envs
|
||||
import fancy_gym
|
||||
|
||||
print("Custom MP tasks:")
|
||||
print(alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
|
||||
print("OpenAI Gym MP tasks:")
|
||||
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
|
||||
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
|
||||
print("Deepmind Control MP tasks:")
|
||||
print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
print(fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
|
||||
print("MetaWorld MP tasks:")
|
||||
print(alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
print(fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
|
||||
```
|
||||
|
||||
### How to create a new MP task
|
||||
@ -181,12 +180,12 @@ class MPWrapper(MPEnvWrapper):
|
||||
|
||||
```
|
||||
|
||||
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well.
|
||||
Without the integration the task can still be used. A rough outline can be shown here, for more details we recommend
|
||||
having a look at the [examples](alr_envs/examples/).
|
||||
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. Without the
|
||||
integration the task can still be used. A rough outline can be shown here, for more details we recommend having a look
|
||||
at the [examples](fancy_gym/examples/).
|
||||
|
||||
```python
|
||||
import alr_envs
|
||||
import fancy_gym
|
||||
|
||||
# Base environment name, according to structure of above example
|
||||
base_env_id = "ball_in_cup-catch"
|
||||
@ -194,12 +193,12 @@ base_env_id = "ball_in_cup-catch"
|
||||
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
|
||||
# You can also add other gym.Wrappers in case they are needed,
|
||||
# e.g. gym.wrappers.FlattenObservation for dict observations
|
||||
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
|
||||
wrappers = [fancy_gym.dmc.suite.ball_in_cup.MPWrapper]
|
||||
mp_kwargs = {...}
|
||||
kwargs = {...}
|
||||
env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
|
||||
env = fancy_gym.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
|
||||
# OR for a deterministic ProMP (other traj_gen_kwargs are required):
|
||||
# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
|
||||
# env = fancy_gym.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
|
||||
|
||||
rewards = 0
|
||||
obs = env.reset()
|
||||
|
@ -1,14 +1,13 @@
|
||||
from alr_envs import dmc, meta, open_ai
|
||||
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
|
||||
|
||||
from fancy_gym import dmc, meta, open_ai
|
||||
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
|
||||
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
# Convenience function for all MP environments
|
||||
from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
|
||||
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
|
||||
|
||||
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
|
||||
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
|
||||
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
|
||||
for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}
|
@ -1,4 +1,5 @@
|
||||
import os
|
||||
|
||||
os.environ["MUJOCO_GL"] = "egl"
|
||||
|
||||
from typing import Tuple, Optional
|
||||
@ -8,9 +9,9 @@ import numpy as np
|
||||
from gym import spaces
|
||||
from mp_pytorch.mp.mp_interfaces import MPInterface
|
||||
|
||||
from alr_envs.black_box.controller.base_controller import BaseController
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from alr_envs.utils.utils import get_numpy
|
||||
from fancy_gym.black_box.controller.base_controller import BaseController
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.utils.utils import get_numpy
|
||||
|
||||
|
||||
class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
@ -77,12 +78,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
||||
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
|
||||
self.traj_gen.set_params(clipped_params)
|
||||
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
|
||||
# TODO Check with Bruce for replanning
|
||||
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
||||
self.traj_gen.set_boundary_conditions(
|
||||
bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]),
|
||||
bc_pos=self.current_pos, bc_vel=self.current_vel)
|
||||
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
||||
self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]),
|
||||
np.array([self.dt]))
|
||||
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
|
||||
@ -99,9 +98,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
|
||||
def _get_action_space(self):
|
||||
"""
|
||||
This function can be used to modify the action space for considering actions which are not learned via motion
|
||||
This function can be used to modify the action space for considering actions which are not learned via movement
|
||||
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
|
||||
motion primitive.
|
||||
movement primitive.
|
||||
Only needs to be overwritten if the action space needs to be modified.
|
||||
"""
|
||||
try:
|
||||
@ -138,7 +137,6 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
for t, (pos, vel) in enumerate(zip(trajectory, velocity)):
|
||||
step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
|
||||
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
||||
# print('step/clipped action ratio: ', step_action/c_action)
|
||||
obs, c_reward, done, info = self.env.step(c_action)
|
||||
rewards[t] = c_reward
|
||||
|
||||
@ -176,26 +174,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
"""Only set render options here, such that they can be used during the rollout.
|
||||
This only needs to be called once"""
|
||||
self.render_kwargs = kwargs
|
||||
# self.env.render(mode=self.render_mode, **self.render_kwargs)
|
||||
# self.env.render(**self.render_kwargs)
|
||||
|
||||
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
|
||||
self.current_traj_steps = 0
|
||||
return super(BlackBoxWrapper, self).reset()
|
||||
|
||||
def plot_trajs(self, des_trajs, des_vels):
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib
|
||||
matplotlib.use('TkAgg')
|
||||
pos_fig = plt.figure('positions')
|
||||
vel_fig = plt.figure('velocities')
|
||||
for i in range(des_trajs.shape[1]):
|
||||
plt.figure(pos_fig.number)
|
||||
plt.subplot(des_trajs.shape[1], 1, i + 1)
|
||||
plt.plot(np.ones(des_trajs.shape[0]) * self.current_pos[i])
|
||||
plt.plot(des_trajs[:, i])
|
||||
|
||||
plt.figure(vel_fig.number)
|
||||
plt.subplot(des_vels.shape[1], 1, i + 1)
|
||||
plt.plot(np.ones(des_trajs.shape[0]) * self.current_vel[i])
|
||||
plt.plot(des_vels[:, i])
|
@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.controller.base_controller import BaseController
|
||||
from fancy_gym.black_box.controller.base_controller import BaseController
|
||||
|
||||
|
||||
class MetaWorldController(BaseController):
|
@ -1,6 +1,6 @@
|
||||
from typing import Union, Tuple
|
||||
|
||||
from alr_envs.black_box.controller.base_controller import BaseController
|
||||
from fancy_gym.black_box.controller.base_controller import BaseController
|
||||
|
||||
|
||||
class PDController(BaseController):
|
@ -1,4 +1,4 @@
|
||||
from alr_envs.black_box.controller.base_controller import BaseController
|
||||
from fancy_gym.black_box.controller.base_controller import BaseController
|
||||
|
||||
|
||||
class PosController(BaseController):
|
@ -1,4 +1,4 @@
|
||||
from alr_envs.black_box.controller.base_controller import BaseController
|
||||
from fancy_gym.black_box.controller.base_controller import BaseController
|
||||
|
||||
|
||||
class VelController(BaseController):
|
@ -1,7 +1,7 @@
|
||||
from alr_envs.black_box.controller.meta_world_controller import MetaWorldController
|
||||
from alr_envs.black_box.controller.pd_controller import PDController
|
||||
from alr_envs.black_box.controller.vel_controller import VelController
|
||||
from alr_envs.black_box.controller.pos_controller import PosController
|
||||
from fancy_gym.black_box.controller.meta_world_controller import MetaWorldController
|
||||
from fancy_gym.black_box.controller.pd_controller import PDController
|
||||
from fancy_gym.black_box.controller.pos_controller import PosController
|
||||
from fancy_gym.black_box.controller.vel_controller import VelController
|
||||
|
||||
ALL_TYPES = ["motor", "velocity", "position", "metaworld"]
|
||||
|
@ -1,8 +1,7 @@
|
||||
from mp_pytorch.mp.dmp import DMP
|
||||
from mp_pytorch.mp.promp import ProMP
|
||||
from mp_pytorch.mp.idmp import IDMP
|
||||
|
||||
from mp_pytorch.basis_gn.basis_generator import BasisGenerator
|
||||
from mp_pytorch.mp.dmp import DMP
|
||||
from mp_pytorch.mp.idmp import IDMP
|
||||
from mp_pytorch.mp.promp import ProMP
|
||||
|
||||
ALL_TYPES = ["promp", "dmp", "idmp"]
|
||||
|
@ -1,9 +1,9 @@
|
||||
from typing import Union, Tuple
|
||||
from mp_pytorch.mp.mp_interfaces import MPInterface
|
||||
from abc import abstractmethod
|
||||
from typing import Union, Tuple
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
from mp_pytorch.mp.mp_interfaces import MPInterface
|
||||
|
||||
|
||||
class RawInterfaceWrapper(gym.Wrapper):
|
||||
@ -55,7 +55,7 @@ class RawInterfaceWrapper(gym.Wrapper):
|
||||
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
|
||||
np.ndarray, Union[np.ndarray, None]]:
|
||||
"""
|
||||
Used to extract the parameters for the motion primitive and other parameters from an action array which might
|
||||
Used to extract the parameters for the movement primitive and other parameters from an action array which might
|
||||
include other actions like ball releasing time for the beer pong environment.
|
||||
This only needs to be overwritten if the action space is modified.
|
||||
Args:
|
@ -56,7 +56,7 @@ kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
|
||||
register(
|
||||
id=f'dmc_ball_in_cup-catch_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bic_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
|
||||
@ -66,7 +66,7 @@ kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
|
||||
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
|
||||
register(
|
||||
id=f'dmc_ball_in_cup-catch_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bic_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
|
||||
@ -80,7 +80,7 @@ kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
register(
|
||||
id=f'dmc_reacher-easy_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bic_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
|
||||
@ -91,7 +91,7 @@ kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
register(
|
||||
id=f'dmc_reacher-easy_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_easy_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
|
||||
@ -105,7 +105,7 @@ kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
|
||||
register(
|
||||
id=f'dmc_reacher-hard_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_hard_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
|
||||
@ -116,7 +116,7 @@ kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
register(
|
||||
id=f'dmc_reacher-hard_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_hard_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
|
||||
@ -136,7 +136,7 @@ for _task in _dmc_cartpole_tasks:
|
||||
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
@ -150,7 +150,7 @@ for _task in _dmc_cartpole_tasks:
|
||||
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -167,7 +167,7 @@ kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
|
||||
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole2poles_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
@ -181,7 +181,7 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
|
||||
_env_id = f'dmc_cartpole-two_poles_promp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole2poles_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -198,7 +198,7 @@ kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
|
||||
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole3poles_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
@ -212,7 +212,7 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
|
||||
_env_id = f'dmc_cartpole-three_poles_promp-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_cartpole3poles_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -227,7 +227,7 @@ kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_
|
||||
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
|
||||
register(
|
||||
id=f'dmc_manipulation-reach_site_dmp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_mani_reach_site_features_dmp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
|
||||
@ -239,7 +239,7 @@ kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weigh
|
||||
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
|
||||
register(
|
||||
id=f'dmc_manipulation-reach_site_promp-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_mani_reach_site_features_promp
|
||||
)
|
||||
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")
|
@ -5,9 +5,9 @@ import collections
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any, Dict, Tuple, Optional, Union, Callable
|
||||
|
||||
from dm_control import composer
|
||||
import gym
|
||||
import numpy as np
|
||||
from dm_control import composer
|
||||
from dm_control.rl import control
|
||||
from dm_env import specs
|
||||
from gym import spaces
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -1,3 +1,3 @@
|
||||
from .mp_wrapper import MPWrapper
|
||||
from .mp_wrapper import TwoPolesMPWrapper
|
||||
from .mp_wrapper import ThreePolesMPWrapper
|
||||
from .mp_wrapper import TwoPolesMPWrapper
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -8,13 +8,13 @@ from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
|
||||
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
|
||||
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
|
||||
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
|
||||
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
|
||||
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
|
||||
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
|
||||
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
|
||||
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
|
||||
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
||||
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
|
||||
from .mujoco.reacher.reacher import ReacherEnv
|
||||
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
|
||||
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
|
||||
@ -64,7 +64,7 @@ DEFAULT_BB_DICT_DMP = {
|
||||
## Simple Reacher
|
||||
register(
|
||||
id='SimpleReacher-v0',
|
||||
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 2,
|
||||
@ -73,7 +73,7 @@ register(
|
||||
|
||||
register(
|
||||
id='LongSimpleReacher-v0',
|
||||
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 5,
|
||||
@ -84,7 +84,7 @@ register(
|
||||
|
||||
register(
|
||||
id='ViaPointReacher-v0',
|
||||
entry_point='alr_envs.envs.classic_control:ViaPointReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:ViaPointReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 5,
|
||||
@ -96,7 +96,7 @@ register(
|
||||
## Hole Reacher
|
||||
register(
|
||||
id='HoleReacher-v0',
|
||||
entry_point='alr_envs.envs.classic_control:HoleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 5,
|
||||
@ -112,12 +112,12 @@ register(
|
||||
|
||||
# Mujoco
|
||||
|
||||
## Reacher
|
||||
## Mujoco Reacher
|
||||
for _dims in [5, 7]:
|
||||
register(
|
||||
id=f'Reacher{_dims}d-v0',
|
||||
entry_point='alr_envs.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=200,
|
||||
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_REACHER,
|
||||
kwargs={
|
||||
"n_links": _dims,
|
||||
}
|
||||
@ -125,17 +125,18 @@ for _dims in [5, 7]:
|
||||
|
||||
register(
|
||||
id=f'Reacher{_dims}dSparse-v0',
|
||||
entry_point='alr_envs.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=200,
|
||||
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_REACHER,
|
||||
kwargs={
|
||||
"sparse": True,
|
||||
'reward_weight': 200,
|
||||
"n_links": _dims,
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='HopperJumpSparse-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperJumpEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||
kwargs={
|
||||
"sparse": True,
|
||||
@ -144,7 +145,7 @@ register(
|
||||
|
||||
register(
|
||||
id='HopperJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperJumpEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||
kwargs={
|
||||
"sparse": False,
|
||||
@ -156,43 +157,43 @@ register(
|
||||
|
||||
register(
|
||||
id='AntJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:AntJumpEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HalfCheetahJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
|
||||
)
|
||||
|
||||
register(
|
||||
id='HopperJumpOnBox-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ALRHopperThrow-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperThrowEnv',
|
||||
id='HopperThrow-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ALRHopperThrowInBasket-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv',
|
||||
id='HopperThrowInBasket-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
|
||||
)
|
||||
|
||||
register(
|
||||
id='ALRWalker2DJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv',
|
||||
id='Walker2DJump-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
|
||||
)
|
||||
|
||||
register(
|
||||
id='BeerPong-v0',
|
||||
entry_point='alr_envs.envs.mujoco:BeerPongEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:BeerPongEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
@ -200,18 +201,18 @@ register(
|
||||
# only one time step, i.e. we simulate until the end of th episode
|
||||
register(
|
||||
id='BeerPongStepBased-v0',
|
||||
entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
|
||||
entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
# Beerpong with episodic reward, but fixed release time step
|
||||
register(
|
||||
id='BeerPongFixedRelease-v0',
|
||||
entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep',
|
||||
entry_point='fancy_gym.envs.mujoco:BeerPongEnvFixedReleaseStep',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
# Motion Primitive Environments
|
||||
# movement Primitive Environments
|
||||
|
||||
## Simple Reacher
|
||||
_versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"]
|
||||
@ -227,7 +228,7 @@ for _v in _versions:
|
||||
kwargs_dict_simple_reacher_dmp['name'] = f"{_v}"
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_simple_reacher_dmp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
@ -240,7 +241,7 @@ for _v in _versions:
|
||||
kwargs_dict_simple_reacher_promp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_simple_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -254,7 +255,7 @@ kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0"
|
||||
register(
|
||||
id='ViaPointReacherDMP-v0',
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs=kwargs_dict_via_point_reacher_dmp
|
||||
)
|
||||
@ -266,7 +267,7 @@ kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'v
|
||||
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0"
|
||||
register(
|
||||
id="ViaPointReacherProMP-v0",
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_via_point_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
|
||||
@ -285,7 +286,7 @@ for _v in _versions:
|
||||
kwargs_dict_hole_reacher_dmp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs=kwargs_dict_hole_reacher_dmp
|
||||
)
|
||||
@ -299,7 +300,7 @@ for _v in _versions:
|
||||
kwargs_dict_hole_reacher_promp['name'] = f"{_v}"
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_hole_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -309,30 +310,26 @@ _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSpa
|
||||
for _v in _versions:
|
||||
_name = _v.split("-")
|
||||
_env_id = f'{_name[0]}DMP-{_name[1]}'
|
||||
kwargs_dict_reacherNd_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_reacherNd_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
|
||||
kwargs_dict_reacherNd_dmp['trajectory_generator_kwargs']['weight_scale'] = 5
|
||||
kwargs_dict_reacherNd_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_reacherNd_dmp['basis_generator_kwargs']['num_basis'] = 2
|
||||
kwargs_dict_reacherNd_dmp['name'] = _v
|
||||
kwargs_dict_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
|
||||
kwargs_dict_reacher_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
|
||||
kwargs_dict_reacher_dmp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# max_episode_steps=1,
|
||||
kwargs=kwargs_dict_reacherNd_dmp
|
||||
kwargs=kwargs_dict_reacher_dmp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
|
||||
|
||||
_env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
|
||||
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
|
||||
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
|
||||
kwargs_dict_alr_reacher_promp['name'] = _v
|
||||
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
|
||||
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
|
||||
kwargs_dict_reacher_promp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_alr_reacher_promp
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_reacher_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
|
||||
@ -352,7 +349,7 @@ for _v in _versions:
|
||||
kwargs_dict_bp_promp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bp_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -372,7 +369,7 @@ for _v in _versions:
|
||||
kwargs_dict_bp_promp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_bp_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -384,7 +381,7 @@ for _v in _versions:
|
||||
# ########################################################################################################################
|
||||
#
|
||||
# ## AntJump
|
||||
# _versions = ['ALRAntJump-v0']
|
||||
# _versions = ['AntJump-v0']
|
||||
# for _v in _versions:
|
||||
# _name = _v.split("-")
|
||||
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
@ -393,7 +390,7 @@ for _v in _versions:
|
||||
# kwargs_dict_ant_jump_promp['name'] = _v
|
||||
# register(
|
||||
# id=_env_id,
|
||||
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# kwargs=kwargs_dict_ant_jump_promp
|
||||
# )
|
||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -401,7 +398,7 @@ for _v in _versions:
|
||||
# ########################################################################################################################
|
||||
#
|
||||
# ## HalfCheetahJump
|
||||
# _versions = ['ALRHalfCheetahJump-v0']
|
||||
# _versions = ['HalfCheetahJump-v0']
|
||||
# for _v in _versions:
|
||||
# _name = _v.split("-")
|
||||
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
@ -410,7 +407,7 @@ for _v in _versions:
|
||||
# kwargs_dict_halfcheetah_jump_promp['name'] = _v
|
||||
# register(
|
||||
# id=_env_id,
|
||||
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# kwargs=kwargs_dict_halfcheetah_jump_promp
|
||||
# )
|
||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -420,7 +417,7 @@ for _v in _versions:
|
||||
|
||||
## HopperJump
|
||||
_versions = ['HopperJump-v0', 'HopperJumpSparse-v0',
|
||||
# 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0'
|
||||
# 'HopperJumpOnBox-v0', 'HopperThrow-v0', 'HopperThrowInBasket-v0'
|
||||
]
|
||||
# TODO: Check if all environments work with the same MPWrapper
|
||||
for _v in _versions:
|
||||
@ -431,7 +428,7 @@ for _v in _versions:
|
||||
kwargs_dict_hopper_jump_promp['name'] = _v
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
kwargs=kwargs_dict_hopper_jump_promp
|
||||
)
|
||||
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -440,7 +437,7 @@ for _v in _versions:
|
||||
#
|
||||
#
|
||||
# ## Walker2DJump
|
||||
# _versions = ['ALRWalker2DJump-v0']
|
||||
# _versions = ['Walker2DJump-v0']
|
||||
# for _v in _versions:
|
||||
# _name = _v.split("-")
|
||||
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
|
||||
@ -449,7 +446,7 @@ for _v in _versions:
|
||||
# kwargs_dict_walker2d_jump_promp['name'] = _v
|
||||
# register(
|
||||
# id=_env_id,
|
||||
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
|
||||
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
|
||||
# kwargs=kwargs_dict_walker2d_jump_promp
|
||||
# )
|
||||
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||
@ -458,7 +455,7 @@ for _v in _versions:
|
||||
"""
|
||||
register(
|
||||
id='SimpleReacher-v1',
|
||||
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 2,
|
||||
@ -468,7 +465,7 @@ register(
|
||||
|
||||
register(
|
||||
id='LongSimpleReacher-v1',
|
||||
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 5,
|
||||
@ -477,7 +474,7 @@ register(
|
||||
)
|
||||
register(
|
||||
id='HoleReacher-v1',
|
||||
entry_point='alr_envs.envs.classic_control:HoleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 5,
|
||||
@ -492,7 +489,7 @@ register(
|
||||
)
|
||||
register(
|
||||
id='HoleReacher-v2',
|
||||
entry_point='alr_envs.envs.classic_control:HoleReacherEnv',
|
||||
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"n_links": 5,
|
||||
@ -508,8 +505,8 @@ register(
|
||||
|
||||
# CtxtFree are v0, Contextual are v1
|
||||
register(
|
||||
id='ALRAntJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:AntJumpEnv',
|
||||
id='AntJump-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
|
||||
@ -518,8 +515,8 @@ register(
|
||||
)
|
||||
# CtxtFree are v0, Contextual are v1
|
||||
register(
|
||||
id='ALRHalfCheetahJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv',
|
||||
id='HalfCheetahJump-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
|
||||
@ -527,8 +524,8 @@ register(
|
||||
}
|
||||
)
|
||||
register(
|
||||
id='ALRHopperJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperJumpEnv',
|
||||
id='HopperJump-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
|
||||
@ -546,26 +543,26 @@ for i in _vs:
|
||||
_env_id = f'ALRReacher{i}-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.envs.mujoco:ReacherEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"steps_before_reward": 0,
|
||||
"n_links": 5,
|
||||
"balance": False,
|
||||
'ctrl_cost_weight': i
|
||||
'_ctrl_cost_weight': i
|
||||
}
|
||||
)
|
||||
|
||||
_env_id = f'ALRReacherSparse{i}-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.envs.mujoco:ReacherEnv',
|
||||
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
|
||||
max_episode_steps=200,
|
||||
kwargs={
|
||||
"steps_before_reward": 200,
|
||||
"n_links": 5,
|
||||
"balance": False,
|
||||
'ctrl_cost_weight': i
|
||||
'_ctrl_cost_weight': i
|
||||
}
|
||||
)
|
||||
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
|
||||
@ -573,7 +570,7 @@ for i in _vs:
|
||||
_env_id = f'ALRReacher{i}ProMP-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"{_env_id.replace('ProMP', '')}",
|
||||
"wrappers": [mujoco.reacher.MPWrapper],
|
||||
@ -596,7 +593,7 @@ for i in _vs:
|
||||
_env_id = f'ALRReacherSparse{i}ProMP-v0'
|
||||
register(
|
||||
id=_env_id,
|
||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||
entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
|
||||
kwargs={
|
||||
"name": f"{_env_id.replace('ProMP', '')}",
|
||||
"wrappers": [mujoco.reacher.MPWrapper],
|
||||
@ -617,8 +614,8 @@ for i in _vs:
|
||||
)
|
||||
|
||||
register(
|
||||
id='ALRHopperJumpOnBox-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv',
|
||||
id='HopperJumpOnBox-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
|
||||
@ -626,8 +623,8 @@ for i in _vs:
|
||||
}
|
||||
)
|
||||
register(
|
||||
id='ALRHopperThrow-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperThrowEnv',
|
||||
id='HopperThrow-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
|
||||
@ -635,8 +632,8 @@ for i in _vs:
|
||||
}
|
||||
)
|
||||
register(
|
||||
id='ALRHopperThrowInBasket-v0',
|
||||
entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv',
|
||||
id='HopperThrowInBasket-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
|
||||
@ -644,8 +641,8 @@ for i in _vs:
|
||||
}
|
||||
)
|
||||
register(
|
||||
id='ALRWalker2DJump-v0',
|
||||
entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv',
|
||||
id='Walker2DJump-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
|
||||
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
|
||||
kwargs={
|
||||
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
|
||||
@ -653,13 +650,13 @@ for i in _vs:
|
||||
}
|
||||
)
|
||||
register(id='TableTennis2DCtxt-v1',
|
||||
entry_point='alr_envs.envs.mujoco:TTEnvGym',
|
||||
entry_point='fancy_gym.envs.mujoco:TTEnvGym',
|
||||
max_episode_steps=MAX_EPISODE_STEPS,
|
||||
kwargs={'ctxt_dim': 2, 'fixed_goal': True})
|
||||
|
||||
register(
|
||||
id='ALRBeerPong-v0',
|
||||
entry_point='alr_envs.envs.mujoco:ALRBeerBongEnv',
|
||||
id='BeerPong-v0',
|
||||
entry_point='fancy_gym.envs.mujoco:BeerBongEnv',
|
||||
max_episode_steps=300,
|
||||
kwargs={
|
||||
"rndm_goal": False,
|
@ -7,7 +7,7 @@ from gym import spaces
|
||||
from gym.core import ObsType
|
||||
from gym.utils import seeding
|
||||
|
||||
from alr_envs.envs.classic_control.utils import intersect
|
||||
from fancy_gym.envs.classic_control.utils import intersect
|
||||
|
||||
|
||||
class BaseReacherEnv(gym.Env, ABC):
|
@ -1,14 +1,16 @@
|
||||
from abc import ABC
|
||||
|
||||
from gym import spaces
|
||||
import numpy as np
|
||||
from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
|
||||
from gym import spaces
|
||||
|
||||
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
|
||||
|
||||
|
||||
class BaseReacherDirectEnv(BaseReacherEnv, ABC):
|
||||
"""
|
||||
Base class for directly controlled reaching environments
|
||||
"""
|
||||
|
||||
def __init__(self, n_links: int, random_start: bool = True,
|
||||
allow_self_collision: bool = False):
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
@ -1,14 +1,16 @@
|
||||
from abc import ABC
|
||||
|
||||
from gym import spaces
|
||||
import numpy as np
|
||||
from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
|
||||
from gym import spaces
|
||||
|
||||
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
|
||||
|
||||
|
||||
class BaseReacherTorqueEnv(BaseReacherEnv, ABC):
|
||||
"""
|
||||
Base class for torque controlled reaching environments
|
||||
"""
|
||||
|
||||
def __init__(self, n_links: int, random_start: bool = True,
|
||||
allow_self_collision: bool = False):
|
||||
super().__init__(n_links, random_start, allow_self_collision)
|
@ -6,7 +6,9 @@ import numpy as np
|
||||
from gym.core import ObsType
|
||||
from matplotlib import patches
|
||||
|
||||
from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
||||
from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
||||
|
||||
MAX_EPISODE_STEPS_HOLEREACHER = 200
|
||||
|
||||
|
||||
class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
@ -41,13 +43,13 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
|
||||
|
||||
if rew_fct == "simple":
|
||||
from alr_envs.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
|
||||
from fancy_gym.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
|
||||
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
|
||||
elif rew_fct == "vel_acc":
|
||||
from alr_envs.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
|
||||
from fancy_gym.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
|
||||
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
|
||||
elif rew_fct == "unbounded":
|
||||
from alr_envs.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
|
||||
from fancy_gym.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
|
||||
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision)
|
||||
else:
|
||||
raise ValueError("Unknown reward function {}".format(rew_fct))
|
||||
@ -224,7 +226,6 @@ class HoleReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
env = HoleReacherEnv(5)
|
||||
env.reset()
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -5,7 +5,7 @@ import numpy as np
|
||||
from gym import spaces
|
||||
from gym.core import ObsType
|
||||
|
||||
from alr_envs.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
|
||||
from fancy_gym.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
|
||||
|
||||
|
||||
class SimpleReacherEnv(BaseReacherTorqueEnv):
|
@ -1,6 +1,3 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def ccw(A, B, C):
|
||||
return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12
|
||||
|
@ -2,7 +2,7 @@ from typing import Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
@ -4,9 +4,8 @@ import gym
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from gym.core import ObsType
|
||||
from gym.utils import seeding
|
||||
|
||||
from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
||||
from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
|
||||
|
||||
|
||||
class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
@ -187,7 +186,6 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
env = ViaPointReacherEnv(5)
|
||||
env.reset()
|
@ -1,9 +1,9 @@
|
||||
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
|
||||
from .ant_jump.ant_jump import AntJumpEnv
|
||||
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
|
||||
from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv
|
||||
from .hopper_jump.hopper_jump import HopperJumpEnv
|
||||
from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv
|
||||
from .hopper_throw.hopper_throw import HopperThrowEnv
|
||||
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
|
||||
from .reacher.reacher import ReacherEnv
|
||||
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
|
||||
from .hopper_jump.hopper_jump import HopperJumpEnv
|
@ -17,7 +17,7 @@ class AntJumpEnv(AntEnv):
|
||||
"""
|
||||
Initialization changes to normal Ant:
|
||||
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
|
||||
- ctrl_cost_weight 0.5 -> 0.0
|
||||
- _ctrl_cost_weight 0.5 -> 0.0
|
||||
- contact_cost_weight: 5e-4 -> 0.0
|
||||
- healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
|
||||
"""
|
@ -2,7 +2,7 @@ from typing import Union, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
|
||||
|
||||
|
||||
class MPWrapper(RawInterfaceWrapper):
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user