renaming to fancy_gym

This commit is contained in:
Fabian 2022-07-13 15:10:43 +02:00
parent d412fb229c
commit 8d1c1b44bf
179 changed files with 332 additions and 361 deletions

View File

@ -1,13 +1,12 @@
## ALR Robotics Control Environments
## Fancy Gym
This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/).
We provide support (under the OpenAI interface) for the benchmark suites
Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
(DMC) and [Metaworld](https://meta-world.github.io/).
Custom (Mujoco) gym environments can be created according
to [this guide](https://www.gymlibrary.ml/content/environment_creation/).
Unlike existing libraries, we additionally support to control agents with movement primitives, such as
Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, we only consider the mean usually).
(DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
to [this guide](https://www.gymlibrary.ml/content/environment_creation/). Unlike existing libraries, we additionally
support to control agents with movement primitives, such as Dynamic Movement Primitives (DMPs) and Probabilistic
Movement Primitives (ProMP, we only consider the mean usually).
## Movement Primitive Environments (Episode-Based/Black-Box Environments)
@ -59,14 +58,14 @@ pip install -e .
## Using the framework
We prepared [multiple examples](alr_envs/examples/), please have a look there for more specific examples.
We prepared [multiple examples](fancy_gym/examples/), please have a look there for more specific examples.
### Step-wise environments
```python
import alr_envs
import fancy_gym
env = alr_envs.make('HoleReacher-v0', seed=1)
env = fancy_gym.make('HoleReacher-v0', seed=1)
state = env.reset()
for i in range(1000):
@ -85,9 +84,9 @@ Existing MP tasks can be created the same way as above. Just keep in mind, calli
trajectory.
```python
import alr_envs
import fancy_gym
env = alr_envs.make('HoleReacherProMP-v0', seed=1)
env = fancy_gym.make('HoleReacherProMP-v0', seed=1)
# render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None).
env.render()
@ -104,19 +103,19 @@ To show all available environments, we provide some additional convenience. Each
keys `DMP` and `ProMP` that store a list of available environment names.
```python
import alr_envs
import fancy_gym
print("Custom MP tasks:")
print(alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("OpenAI Gym MP tasks:")
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS)
print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("Deepmind Control MP tasks:")
print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print(fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("MetaWorld MP tasks:")
print(alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print(fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
```
### How to create a new MP task
@ -181,12 +180,12 @@ class MPWrapper(MPEnvWrapper):
```
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well.
Without the integration the task can still be used. A rough outline can be shown here, for more details we recommend
having a look at the [examples](alr_envs/examples/).
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. Without the
integration the task can still be used. A rough outline can be shown here, for more details we recommend having a look
at the [examples](fancy_gym/examples/).
```python
import alr_envs
import fancy_gym
# Base environment name, according to structure of above example
base_env_id = "ball_in_cup-catch"
@ -194,12 +193,12 @@ base_env_id = "ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed,
# e.g. gym.wrappers.FlattenObservation for dict observations
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper]
wrappers = [fancy_gym.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = {...}
kwargs = {...}
env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
env = fancy_gym.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
# OR for a deterministic ProMP (other traj_gen_kwargs are required):
# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
# env = fancy_gym.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
rewards = 0
obs = env.reset()

View File

@ -1,14 +1,13 @@
from alr_envs import dmc, meta, open_ai
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank
from fancy_gym import dmc, meta, open_ai
from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
# Convenience function for all MP environments
from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}

View File

@ -1,4 +1,5 @@
import os
os.environ["MUJOCO_GL"] = "egl"
from typing import Tuple, Optional
@ -8,9 +9,9 @@ import numpy as np
from gym import spaces
from mp_pytorch.mp.mp_interfaces import MPInterface
from alr_envs.black_box.controller.base_controller import BaseController
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import get_numpy
from fancy_gym.black_box.controller.base_controller import BaseController
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.utils.utils import get_numpy
class BlackBoxWrapper(gym.ObservationWrapper):
@ -77,12 +78,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def get_trajectory(self, action: np.ndarray) -> Tuple:
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
self.traj_gen.set_params(clipped_params)
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing
# TODO Check with Bruce for replanning
# TODO: is this correct for replanning? Do we need to adjust anything here?
self.traj_gen.set_boundary_conditions(
bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]),
bc_pos=self.current_pos, bc_vel=self.current_vel)
# TODO: is this correct for replanning? Do we need to adjust anything here?
self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]),
np.array([self.dt]))
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
@ -99,9 +98,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def _get_action_space(self):
"""
This function can be used to modify the action space for considering actions which are not learned via motion
This function can be used to modify the action space for considering actions which are not learned via movement
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
motion primitive.
movement primitive.
Only needs to be overwritten if the action space needs to be modified.
"""
try:
@ -138,7 +137,6 @@ class BlackBoxWrapper(gym.ObservationWrapper):
for t, (pos, vel) in enumerate(zip(trajectory, velocity)):
step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
# print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action)
rewards[t] = c_reward
@ -176,26 +174,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
"""Only set render options here, such that they can be used during the rollout.
This only needs to be called once"""
self.render_kwargs = kwargs
# self.env.render(mode=self.render_mode, **self.render_kwargs)
# self.env.render(**self.render_kwargs)
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
self.current_traj_steps = 0
return super(BlackBoxWrapper, self).reset()
def plot_trajs(self, des_trajs, des_vels):
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')
pos_fig = plt.figure('positions')
vel_fig = plt.figure('velocities')
for i in range(des_trajs.shape[1]):
plt.figure(pos_fig.number)
plt.subplot(des_trajs.shape[1], 1, i + 1)
plt.plot(np.ones(des_trajs.shape[0]) * self.current_pos[i])
plt.plot(des_trajs[:, i])
plt.figure(vel_fig.number)
plt.subplot(des_vels.shape[1], 1, i + 1)
plt.plot(np.ones(des_trajs.shape[0]) * self.current_vel[i])
plt.plot(des_vels[:, i])

View File

@ -1,6 +1,6 @@
import numpy as np
from alr_envs.black_box.controller.base_controller import BaseController
from fancy_gym.black_box.controller.base_controller import BaseController
class MetaWorldController(BaseController):

View File

@ -1,6 +1,6 @@
from typing import Union, Tuple
from alr_envs.black_box.controller.base_controller import BaseController
from fancy_gym.black_box.controller.base_controller import BaseController
class PDController(BaseController):

View File

@ -1,4 +1,4 @@
from alr_envs.black_box.controller.base_controller import BaseController
from fancy_gym.black_box.controller.base_controller import BaseController
class PosController(BaseController):

View File

@ -1,4 +1,4 @@
from alr_envs.black_box.controller.base_controller import BaseController
from fancy_gym.black_box.controller.base_controller import BaseController
class VelController(BaseController):

View File

@ -1,7 +1,7 @@
from alr_envs.black_box.controller.meta_world_controller import MetaWorldController
from alr_envs.black_box.controller.pd_controller import PDController
from alr_envs.black_box.controller.vel_controller import VelController
from alr_envs.black_box.controller.pos_controller import PosController
from fancy_gym.black_box.controller.meta_world_controller import MetaWorldController
from fancy_gym.black_box.controller.pd_controller import PDController
from fancy_gym.black_box.controller.pos_controller import PosController
from fancy_gym.black_box.controller.vel_controller import VelController
ALL_TYPES = ["motor", "velocity", "position", "metaworld"]

View File

@ -1,8 +1,7 @@
from mp_pytorch.mp.dmp import DMP
from mp_pytorch.mp.promp import ProMP
from mp_pytorch.mp.idmp import IDMP
from mp_pytorch.basis_gn.basis_generator import BasisGenerator
from mp_pytorch.mp.dmp import DMP
from mp_pytorch.mp.idmp import IDMP
from mp_pytorch.mp.promp import ProMP
ALL_TYPES = ["promp", "dmp", "idmp"]

View File

@ -1,9 +1,9 @@
from typing import Union, Tuple
from mp_pytorch.mp.mp_interfaces import MPInterface
from abc import abstractmethod
from typing import Union, Tuple
import gym
import numpy as np
from mp_pytorch.mp.mp_interfaces import MPInterface
class RawInterfaceWrapper(gym.Wrapper):
@ -55,7 +55,7 @@ class RawInterfaceWrapper(gym.Wrapper):
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
np.ndarray, Union[np.ndarray, None]]:
"""
Used to extract the parameters for the motion primitive and other parameters from an action array which might
Used to extract the parameters for the movement primitive and other parameters from an action array which might
include other actions like ball releasing time for the beer pong environment.
This only needs to be overwritten if the action space is modified.
Args:

View File

@ -56,7 +56,7 @@ kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
register(
id=f'dmc_ball_in_cup-catch_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
@ -66,7 +66,7 @@ kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
register(
id=f'dmc_ball_in_cup-catch_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
@ -80,7 +80,7 @@ kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
register(
id=f'dmc_reacher-easy_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
@ -91,7 +91,7 @@ kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=f'dmc_reacher-easy_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_easy_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
@ -105,7 +105,7 @@ kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
register(
id=f'dmc_reacher-hard_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
@ -116,7 +116,7 @@ kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=f'dmc_reacher-hard_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
@ -136,7 +136,7 @@ for _task in _dmc_cartpole_tasks:
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -150,7 +150,7 @@ for _task in _dmc_cartpole_tasks:
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -167,7 +167,7 @@ kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-two_poles_dmp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -181,7 +181,7 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-two_poles_promp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -198,7 +198,7 @@ kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-three_poles_dmp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -212,7 +212,7 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-three_poles_promp-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -227,7 +227,7 @@ kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
register(
id=f'dmc_manipulation-reach_site_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_dmp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
@ -239,7 +239,7 @@ kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weigh
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
register(
id=f'dmc_manipulation-reach_site_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_promp
)
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")

View File

@ -5,9 +5,9 @@ import collections
from collections.abc import MutableMapping
from typing import Any, Dict, Tuple, Optional, Union, Callable
from dm_control import composer
import gym
import numpy as np
from dm_control import composer
from dm_control.rl import control
from dm_env import specs
from gym import spaces

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -1,3 +1,3 @@
from .mp_wrapper import MPWrapper
from .mp_wrapper import TwoPolesMPWrapper
from .mp_wrapper import ThreePolesMPWrapper
from .mp_wrapper import TwoPolesMPWrapper

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -8,13 +8,13 @@ from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
from .mujoco.reacher.reacher import ReacherEnv
from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@ -64,7 +64,7 @@ DEFAULT_BB_DICT_DMP = {
## Simple Reacher
register(
id='SimpleReacher-v0',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 2,
@ -73,7 +73,7 @@ register(
register(
id='LongSimpleReacher-v0',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
@ -84,7 +84,7 @@ register(
register(
id='ViaPointReacher-v0',
entry_point='alr_envs.envs.classic_control:ViaPointReacherEnv',
entry_point='fancy_gym.envs.classic_control:ViaPointReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
@ -96,7 +96,7 @@ register(
## Hole Reacher
register(
id='HoleReacher-v0',
entry_point='alr_envs.envs.classic_control:HoleReacherEnv',
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
@ -112,12 +112,12 @@ register(
# Mujoco
## Reacher
## Mujoco Reacher
for _dims in [5, 7]:
register(
id=f'Reacher{_dims}d-v0',
entry_point='alr_envs.envs.mujoco:ReacherEnv',
max_episode_steps=200,
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=MAX_EPISODE_STEPS_REACHER,
kwargs={
"n_links": _dims,
}
@ -125,17 +125,18 @@ for _dims in [5, 7]:
register(
id=f'Reacher{_dims}dSparse-v0',
entry_point='alr_envs.envs.mujoco:ReacherEnv',
max_episode_steps=200,
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=MAX_EPISODE_STEPS_REACHER,
kwargs={
"sparse": True,
'reward_weight': 200,
"n_links": _dims,
}
)
register(
id='HopperJumpSparse-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpEnv',
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"sparse": True,
@ -144,7 +145,7 @@ register(
register(
id='HopperJump-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpEnv',
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"sparse": False,
@ -156,43 +157,43 @@ register(
register(
id='AntJump-v0',
entry_point='alr_envs.envs.mujoco:AntJumpEnv',
entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
)
register(
id='HalfCheetahJump-v0',
entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv',
entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
)
register(
id='HopperJumpOnBox-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv',
entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
)
register(
id='ALRHopperThrow-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowEnv',
id='HopperThrow-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
)
register(
id='ALRHopperThrowInBasket-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv',
id='HopperThrowInBasket-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
)
register(
id='ALRWalker2DJump-v0',
entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv',
id='Walker2DJump-v0',
entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
)
register(
id='BeerPong-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnv',
entry_point='fancy_gym.envs.mujoco:BeerPongEnv',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
@ -200,18 +201,18 @@ register(
# only one time step, i.e. we simulate until the end of th episode
register(
id='BeerPongStepBased-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
# Beerpong with episodic reward, but fixed release time step
register(
id='BeerPongFixedRelease-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep',
entry_point='fancy_gym.envs.mujoco:BeerPongEnvFixedReleaseStep',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
# Motion Primitive Environments
# movement Primitive Environments
## Simple Reacher
_versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"]
@ -227,7 +228,7 @@ for _v in _versions:
kwargs_dict_simple_reacher_dmp['name'] = f"{_v}"
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_dmp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -240,7 +241,7 @@ for _v in _versions:
kwargs_dict_simple_reacher_promp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -254,7 +255,7 @@ kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0"
register(
id='ViaPointReacherDMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs=kwargs_dict_via_point_reacher_dmp
)
@ -266,7 +267,7 @@ kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'v
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0"
register(
id="ViaPointReacherProMP-v0",
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_via_point_reacher_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
@ -285,7 +286,7 @@ for _v in _versions:
kwargs_dict_hole_reacher_dmp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs=kwargs_dict_hole_reacher_dmp
)
@ -299,7 +300,7 @@ for _v in _versions:
kwargs_dict_hole_reacher_promp['name'] = f"{_v}"
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hole_reacher_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -309,30 +310,26 @@ _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSpa
for _v in _versions:
_name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_reacherNd_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacherNd_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacherNd_dmp['trajectory_generator_kwargs']['weight_scale'] = 5
kwargs_dict_reacherNd_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacherNd_dmp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_reacherNd_dmp['name'] = _v
kwargs_dict_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacher_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_dmp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1,
kwargs=kwargs_dict_reacherNd_dmp
kwargs=kwargs_dict_reacher_dmp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
kwargs_dict_alr_reacher_promp['name'] = _v
kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacher_promp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_alr_reacher_promp
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -352,7 +349,7 @@ for _v in _versions:
kwargs_dict_bp_promp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -372,7 +369,7 @@ for _v in _versions:
kwargs_dict_bp_promp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -384,7 +381,7 @@ for _v in _versions:
# ########################################################################################################################
#
# ## AntJump
# _versions = ['ALRAntJump-v0']
# _versions = ['AntJump-v0']
# for _v in _versions:
# _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -393,7 +390,7 @@ for _v in _versions:
# kwargs_dict_ant_jump_promp['name'] = _v
# register(
# id=_env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_ant_jump_promp
# )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -401,7 +398,7 @@ for _v in _versions:
# ########################################################################################################################
#
# ## HalfCheetahJump
# _versions = ['ALRHalfCheetahJump-v0']
# _versions = ['HalfCheetahJump-v0']
# for _v in _versions:
# _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -410,7 +407,7 @@ for _v in _versions:
# kwargs_dict_halfcheetah_jump_promp['name'] = _v
# register(
# id=_env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_halfcheetah_jump_promp
# )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -420,7 +417,7 @@ for _v in _versions:
## HopperJump
_versions = ['HopperJump-v0', 'HopperJumpSparse-v0',
# 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0'
# 'HopperJumpOnBox-v0', 'HopperThrow-v0', 'HopperThrowInBasket-v0'
]
# TODO: Check if all environments work with the same MPWrapper
for _v in _versions:
@ -431,7 +428,7 @@ for _v in _versions:
kwargs_dict_hopper_jump_promp['name'] = _v
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hopper_jump_promp
)
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -440,7 +437,7 @@ for _v in _versions:
#
#
# ## Walker2DJump
# _versions = ['ALRWalker2DJump-v0']
# _versions = ['Walker2DJump-v0']
# for _v in _versions:
# _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -449,7 +446,7 @@ for _v in _versions:
# kwargs_dict_walker2d_jump_promp['name'] = _v
# register(
# id=_env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper',
# entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_walker2d_jump_promp
# )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -458,7 +455,7 @@ for _v in _versions:
"""
register(
id='SimpleReacher-v1',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 2,
@ -468,7 +465,7 @@ register(
register(
id='LongSimpleReacher-v1',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv',
entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
@ -477,7 +474,7 @@ register(
)
register(
id='HoleReacher-v1',
entry_point='alr_envs.envs.classic_control:HoleReacherEnv',
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
@ -492,7 +489,7 @@ register(
)
register(
id='HoleReacher-v2',
entry_point='alr_envs.envs.classic_control:HoleReacherEnv',
entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200,
kwargs={
"n_links": 5,
@ -508,8 +505,8 @@ register(
# CtxtFree are v0, Contextual are v1
register(
id='ALRAntJump-v0',
entry_point='alr_envs.envs.mujoco:AntJumpEnv',
id='AntJump-v0',
entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
@ -518,8 +515,8 @@ register(
)
# CtxtFree are v0, Contextual are v1
register(
id='ALRHalfCheetahJump-v0',
entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv',
id='HalfCheetahJump-v0',
entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
@ -527,8 +524,8 @@ register(
}
)
register(
id='ALRHopperJump-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpEnv',
id='HopperJump-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
@ -546,26 +543,26 @@ for i in _vs:
_env_id = f'ALRReacher{i}-v0'
register(
id=_env_id,
entry_point='alr_envs.envs.mujoco:ReacherEnv',
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 0,
"n_links": 5,
"balance": False,
'ctrl_cost_weight': i
'_ctrl_cost_weight': i
}
)
_env_id = f'ALRReacherSparse{i}-v0'
register(
id=_env_id,
entry_point='alr_envs.envs.mujoco:ReacherEnv',
entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200,
kwargs={
"steps_before_reward": 200,
"n_links": 5,
"balance": False,
'ctrl_cost_weight': i
'_ctrl_cost_weight': i
}
)
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
@ -573,7 +570,7 @@ for i in _vs:
_env_id = f'ALRReacher{i}ProMP-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"{_env_id.replace('ProMP', '')}",
"wrappers": [mujoco.reacher.MPWrapper],
@ -596,7 +593,7 @@ for i in _vs:
_env_id = f'ALRReacherSparse{i}ProMP-v0'
register(
id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
kwargs={
"name": f"{_env_id.replace('ProMP', '')}",
"wrappers": [mujoco.reacher.MPWrapper],
@ -617,8 +614,8 @@ for i in _vs:
)
register(
id='ALRHopperJumpOnBox-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv',
id='HopperJumpOnBox-v0',
entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
@ -626,8 +623,8 @@ for i in _vs:
}
)
register(
id='ALRHopperThrow-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowEnv',
id='HopperThrow-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
@ -635,8 +632,8 @@ for i in _vs:
}
)
register(
id='ALRHopperThrowInBasket-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv',
id='HopperThrowInBasket-v0',
entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
@ -644,8 +641,8 @@ for i in _vs:
}
)
register(
id='ALRWalker2DJump-v0',
entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv',
id='Walker2DJump-v0',
entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
@ -653,13 +650,13 @@ for i in _vs:
}
)
register(id='TableTennis2DCtxt-v1',
entry_point='alr_envs.envs.mujoco:TTEnvGym',
entry_point='fancy_gym.envs.mujoco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 2, 'fixed_goal': True})
register(
id='ALRBeerPong-v0',
entry_point='alr_envs.envs.mujoco:ALRBeerBongEnv',
id='BeerPong-v0',
entry_point='fancy_gym.envs.mujoco:BeerBongEnv',
max_episode_steps=300,
kwargs={
"rndm_goal": False,

View File

@ -7,7 +7,7 @@ from gym import spaces
from gym.core import ObsType
from gym.utils import seeding
from alr_envs.envs.classic_control.utils import intersect
from fancy_gym.envs.classic_control.utils import intersect
class BaseReacherEnv(gym.Env, ABC):

View File

@ -1,14 +1,16 @@
from abc import ABC
from gym import spaces
import numpy as np
from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
from gym import spaces
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
class BaseReacherDirectEnv(BaseReacherEnv, ABC):
"""
Base class for directly controlled reaching environments
"""
def __init__(self, n_links: int, random_start: bool = True,
allow_self_collision: bool = False):
super().__init__(n_links, random_start, allow_self_collision)

View File

@ -1,14 +1,16 @@
from abc import ABC
from gym import spaces
import numpy as np
from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
from gym import spaces
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
class BaseReacherTorqueEnv(BaseReacherEnv, ABC):
"""
Base class for torque controlled reaching environments
"""
def __init__(self, n_links: int, random_start: bool = True,
allow_self_collision: bool = False):
super().__init__(n_links, random_start, allow_self_collision)

View File

@ -6,7 +6,9 @@ import numpy as np
from gym.core import ObsType
from matplotlib import patches
from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
MAX_EPISODE_STEPS_HOLEREACHER = 200
class HoleReacherEnv(BaseReacherDirectEnv):
@ -41,13 +43,13 @@ class HoleReacherEnv(BaseReacherDirectEnv):
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
if rew_fct == "simple":
from alr_envs.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
from fancy_gym.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "vel_acc":
from alr_envs.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
from fancy_gym.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "unbounded":
from alr_envs.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
from fancy_gym.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision)
else:
raise ValueError("Unknown reward function {}".format(rew_fct))
@ -224,7 +226,6 @@ class HoleReacherEnv(BaseReacherDirectEnv):
if __name__ == "__main__":
import time
env = HoleReacherEnv(5)
env.reset()

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -5,7 +5,7 @@ import numpy as np
from gym import spaces
from gym.core import ObsType
from alr_envs.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
from fancy_gym.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
class SimpleReacherEnv(BaseReacherTorqueEnv):

View File

@ -1,6 +1,3 @@
import numpy as np
def ccw(A, B, C):
return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

View File

@ -4,9 +4,8 @@ import gym
import matplotlib.pyplot as plt
import numpy as np
from gym.core import ObsType
from gym.utils import seeding
from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
class ViaPointReacherEnv(BaseReacherDirectEnv):
@ -187,7 +186,6 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
if __name__ == "__main__":
import time
env = ViaPointReacherEnv(5)
env.reset()

View File

@ -1,9 +1,9 @@
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
from .ant_jump.ant_jump import AntJumpEnv
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv
from .hopper_jump.hopper_jump import HopperJumpEnv
from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import HopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
from .hopper_jump.hopper_jump import HopperJumpEnv

View File

@ -17,7 +17,7 @@ class AntJumpEnv(AntEnv):
"""
Initialization changes to normal Ant:
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
- ctrl_cost_weight 0.5 -> 0.0
- _ctrl_cost_weight 0.5 -> 0.0
- contact_cost_weight: 5e-4 -> 0.0
- healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
"""

View File

@ -2,7 +2,7 @@ from typing import Union, Tuple
import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):

Some files were not shown because too many files have changed in this diff Show More