renaming to fancy_gym

This commit is contained in:
Fabian 2022-07-13 15:10:43 +02:00
parent d412fb229c
commit 8d1c1b44bf
179 changed files with 332 additions and 361 deletions

View File

@ -1,13 +1,12 @@
## ALR Robotics Control Environments ## Fancy Gym
This project offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/). Fancy gym offers a large variety of reinforcement learning environments under the unifying interface
We provide support (under the OpenAI interface) for the benchmark suites of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites
[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) [DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control)
(DMC) and [Metaworld](https://meta-world.github.io/). (DMC) and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environments can be created according
Custom (Mujoco) gym environments can be created according to [this guide](https://www.gymlibrary.ml/content/environment_creation/). Unlike existing libraries, we additionally
to [this guide](https://www.gymlibrary.ml/content/environment_creation/). support to control agents with movement primitives, such as Dynamic Movement Primitives (DMPs) and Probabilistic
Unlike existing libraries, we additionally support to control agents with movement primitives, such as Movement Primitives (ProMP, we only consider the mean usually).
Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (ProMP, we only consider the mean usually).
## Movement Primitive Environments (Episode-Based/Black-Box Environments) ## Movement Primitive Environments (Episode-Based/Black-Box Environments)
@ -59,14 +58,14 @@ pip install -e .
## Using the framework ## Using the framework
We prepared [multiple examples](alr_envs/examples/), please have a look there for more specific examples. We prepared [multiple examples](fancy_gym/examples/), please have a look there for more specific examples.
### Step-wise environments ### Step-wise environments
```python ```python
import alr_envs import fancy_gym
env = alr_envs.make('HoleReacher-v0', seed=1) env = fancy_gym.make('HoleReacher-v0', seed=1)
state = env.reset() state = env.reset()
for i in range(1000): for i in range(1000):
@ -85,9 +84,9 @@ Existing MP tasks can be created the same way as above. Just keep in mind, calli
trajectory. trajectory.
```python ```python
import alr_envs import fancy_gym
env = alr_envs.make('HoleReacherProMP-v0', seed=1) env = fancy_gym.make('HoleReacherProMP-v0', seed=1)
# render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None). # render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None).
env.render() env.render()
@ -104,19 +103,19 @@ To show all available environments, we provide some additional convenience. Each
keys `DMP` and `ProMP` that store a list of available environment names. keys `DMP` and `ProMP` that store a list of available environment names.
```python ```python
import alr_envs import fancy_gym
print("Custom MP tasks:") print("Custom MP tasks:")
print(alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("OpenAI Gym MP tasks:") print("OpenAI Gym MP tasks:")
print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS) print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("Deepmind Control MP tasks:") print("Deepmind Control MP tasks:")
print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print(fancy_gym.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
print("MetaWorld MP tasks:") print("MetaWorld MP tasks:")
print(alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print(fancy_gym.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS)
``` ```
### How to create a new MP task ### How to create a new MP task
@ -181,12 +180,12 @@ class MPWrapper(MPEnvWrapper):
``` ```
If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. Without the
Without the integration the task can still be used. A rough outline can be shown here, for more details we recommend integration the task can still be used. A rough outline can be shown here, for more details we recommend having a look
having a look at the [examples](alr_envs/examples/). at the [examples](fancy_gym/examples/).
```python ```python
import alr_envs import fancy_gym
# Base environment name, according to structure of above example # Base environment name, according to structure of above example
base_env_id = "ball_in_cup-catch" base_env_id = "ball_in_cup-catch"
@ -194,12 +193,12 @@ base_env_id = "ball_in_cup-catch"
# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper.
# You can also add other gym.Wrappers in case they are needed, # You can also add other gym.Wrappers in case they are needed,
# e.g. gym.wrappers.FlattenObservation for dict observations # e.g. gym.wrappers.FlattenObservation for dict observations
wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper] wrappers = [fancy_gym.dmc.suite.ball_in_cup.MPWrapper]
mp_kwargs = {...} mp_kwargs = {...}
kwargs = {...} kwargs = {...}
env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs) env = fancy_gym.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs)
# OR for a deterministic ProMP (other traj_gen_kwargs are required): # OR for a deterministic ProMP (other traj_gen_kwargs are required):
# env = alr_envs.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args) # env = fancy_gym.make_promp_env(base_env, wrappers=wrappers, seed=seed, traj_gen_kwargs=mp_args)
rewards = 0 rewards = 0
obs = env.reset() obs = env.reset()

View File

@ -1,14 +1,13 @@
from alr_envs import dmc, meta, open_ai from fancy_gym import dmc, meta, open_ai
from alr_envs.utils.make_env_helpers import make, make_bb, make_rank from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
# Convenience function for all MP environments # Convenience function for all MP environments
from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS
from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS
ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {
key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] +
ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key]
for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()}

View File

@ -1,4 +1,5 @@
import os import os
os.environ["MUJOCO_GL"] = "egl" os.environ["MUJOCO_GL"] = "egl"
from typing import Tuple, Optional from typing import Tuple, Optional
@ -8,9 +9,9 @@ import numpy as np
from gym import spaces from gym import spaces
from mp_pytorch.mp.mp_interfaces import MPInterface from mp_pytorch.mp.mp_interfaces import MPInterface
from alr_envs.black_box.controller.base_controller import BaseController from fancy_gym.black_box.controller.base_controller import BaseController
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
from alr_envs.utils.utils import get_numpy from fancy_gym.utils.utils import get_numpy
class BlackBoxWrapper(gym.ObservationWrapper): class BlackBoxWrapper(gym.ObservationWrapper):
@ -77,12 +78,10 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def get_trajectory(self, action: np.ndarray) -> Tuple: def get_trajectory(self, action: np.ndarray) -> Tuple:
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high) clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
self.traj_gen.set_params(clipped_params) self.traj_gen.set_params(clipped_params)
# TODO: Bruce said DMP, ProMP, ProDMP can have 0 bc_time for sequencing # TODO: is this correct for replanning? Do we need to adjust anything here?
# TODO Check with Bruce for replanning
self.traj_gen.set_boundary_conditions( self.traj_gen.set_boundary_conditions(
bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]), bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]),
bc_pos=self.current_pos, bc_vel=self.current_vel) bc_pos=self.current_pos, bc_vel=self.current_vel)
# TODO: is this correct for replanning? Do we need to adjust anything here?
self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]), self.traj_gen.set_duration(None if self.learn_sub_trajectories else np.array([self.duration]),
np.array([self.dt])) np.array([self.dt]))
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
@ -99,9 +98,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
def _get_action_space(self): def _get_action_space(self):
""" """
This function can be used to modify the action space for considering actions which are not learned via motion This function can be used to modify the action space for considering actions which are not learned via movement
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
motion primitive. movement primitive.
Only needs to be overwritten if the action space needs to be modified. Only needs to be overwritten if the action space needs to be modified.
""" """
try: try:
@ -138,7 +137,6 @@ class BlackBoxWrapper(gym.ObservationWrapper):
for t, (pos, vel) in enumerate(zip(trajectory, velocity)): for t, (pos, vel) in enumerate(zip(trajectory, velocity)):
step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel) step_action = self.tracking_controller.get_action(pos, vel, self.current_pos, self.current_vel)
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high) c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
# print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action) obs, c_reward, done, info = self.env.step(c_action)
rewards[t] = c_reward rewards[t] = c_reward
@ -176,26 +174,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
"""Only set render options here, such that they can be used during the rollout. """Only set render options here, such that they can be used during the rollout.
This only needs to be called once""" This only needs to be called once"""
self.render_kwargs = kwargs self.render_kwargs = kwargs
# self.env.render(mode=self.render_mode, **self.render_kwargs)
# self.env.render(**self.render_kwargs)
def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None):
self.current_traj_steps = 0 self.current_traj_steps = 0
return super(BlackBoxWrapper, self).reset() return super(BlackBoxWrapper, self).reset()
def plot_trajs(self, des_trajs, des_vels):
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')
pos_fig = plt.figure('positions')
vel_fig = plt.figure('velocities')
for i in range(des_trajs.shape[1]):
plt.figure(pos_fig.number)
plt.subplot(des_trajs.shape[1], 1, i + 1)
plt.plot(np.ones(des_trajs.shape[0]) * self.current_pos[i])
plt.plot(des_trajs[:, i])
plt.figure(vel_fig.number)
plt.subplot(des_vels.shape[1], 1, i + 1)
plt.plot(np.ones(des_trajs.shape[0]) * self.current_vel[i])
plt.plot(des_vels[:, i])

View File

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from alr_envs.black_box.controller.base_controller import BaseController from fancy_gym.black_box.controller.base_controller import BaseController
class MetaWorldController(BaseController): class MetaWorldController(BaseController):

View File

@ -1,6 +1,6 @@
from typing import Union, Tuple from typing import Union, Tuple
from alr_envs.black_box.controller.base_controller import BaseController from fancy_gym.black_box.controller.base_controller import BaseController
class PDController(BaseController): class PDController(BaseController):

View File

@ -1,4 +1,4 @@
from alr_envs.black_box.controller.base_controller import BaseController from fancy_gym.black_box.controller.base_controller import BaseController
class PosController(BaseController): class PosController(BaseController):

View File

@ -1,4 +1,4 @@
from alr_envs.black_box.controller.base_controller import BaseController from fancy_gym.black_box.controller.base_controller import BaseController
class VelController(BaseController): class VelController(BaseController):

View File

@ -1,7 +1,7 @@
from alr_envs.black_box.controller.meta_world_controller import MetaWorldController from fancy_gym.black_box.controller.meta_world_controller import MetaWorldController
from alr_envs.black_box.controller.pd_controller import PDController from fancy_gym.black_box.controller.pd_controller import PDController
from alr_envs.black_box.controller.vel_controller import VelController from fancy_gym.black_box.controller.pos_controller import PosController
from alr_envs.black_box.controller.pos_controller import PosController from fancy_gym.black_box.controller.vel_controller import VelController
ALL_TYPES = ["motor", "velocity", "position", "metaworld"] ALL_TYPES = ["motor", "velocity", "position", "metaworld"]

View File

@ -1,8 +1,7 @@
from mp_pytorch.mp.dmp import DMP
from mp_pytorch.mp.promp import ProMP
from mp_pytorch.mp.idmp import IDMP
from mp_pytorch.basis_gn.basis_generator import BasisGenerator from mp_pytorch.basis_gn.basis_generator import BasisGenerator
from mp_pytorch.mp.dmp import DMP
from mp_pytorch.mp.idmp import IDMP
from mp_pytorch.mp.promp import ProMP
ALL_TYPES = ["promp", "dmp", "idmp"] ALL_TYPES = ["promp", "dmp", "idmp"]

View File

@ -1,9 +1,9 @@
from typing import Union, Tuple
from mp_pytorch.mp.mp_interfaces import MPInterface
from abc import abstractmethod from abc import abstractmethod
from typing import Union, Tuple
import gym import gym
import numpy as np import numpy as np
from mp_pytorch.mp.mp_interfaces import MPInterface
class RawInterfaceWrapper(gym.Wrapper): class RawInterfaceWrapper(gym.Wrapper):
@ -55,7 +55,7 @@ class RawInterfaceWrapper(gym.Wrapper):
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[ def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
np.ndarray, Union[np.ndarray, None]]: np.ndarray, Union[np.ndarray, None]]:
""" """
Used to extract the parameters for the motion primitive and other parameters from an action array which might Used to extract the parameters for the movement primitive and other parameters from an action array which might
include other actions like ball releasing time for the beer pong environment. include other actions like ball releasing time for the beer pong environment.
This only needs to be overwritten if the action space is modified. This only needs to be overwritten if the action space is modified.
Args: Args:

View File

@ -56,7 +56,7 @@ kwargs_dict_bic_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1 kwargs_dict_bic_dmp['trajectory_generator_kwargs']['weight_scale'] = 10 # TODO: weight scale 1, but goal scale 0.1
register( register(
id=f'dmc_ball_in_cup-catch_dmp-v0', id=f'dmc_ball_in_cup-catch_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_dmp kwargs=kwargs_dict_bic_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0")
@ -66,7 +66,7 @@ kwargs_dict_bic_promp['name'] = f"dmc:ball_in_cup-catch"
kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper) kwargs_dict_bic_promp['wrappers'].append(suite.ball_in_cup.MPWrapper)
register( register(
id=f'dmc_ball_in_cup-catch_promp-v0', id=f'dmc_ball_in_cup-catch_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_promp kwargs=kwargs_dict_bic_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_ball_in_cup-catch_promp-v0")
@ -80,7 +80,7 @@ kwargs_dict_reacher_easy_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_reacher_easy_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
register( register(
id=f'dmc_reacher-easy_dmp-v0', id=f'dmc_reacher-easy_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bic_dmp kwargs=kwargs_dict_bic_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0")
@ -91,7 +91,7 @@ kwargs_dict_reacher_easy_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_reacher_easy_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register( register(
id=f'dmc_reacher-easy_promp-v0', id=f'dmc_reacher-easy_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_easy_promp kwargs=kwargs_dict_reacher_easy_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-easy_promp-v0")
@ -105,7 +105,7 @@ kwargs_dict_reacher_hard_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500 kwargs_dict_reacher_hard_dmp['trajectory_generator_kwargs']['weight_scale'] = 500
register( register(
id=f'dmc_reacher-hard_dmp-v0', id=f'dmc_reacher-hard_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_dmp kwargs=kwargs_dict_reacher_hard_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0")
@ -116,7 +116,7 @@ kwargs_dict_reacher_hard_promp['wrappers'].append(suite.reacher.MPWrapper)
kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_reacher_hard_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register( register(
id=f'dmc_reacher-hard_promp-v0', id=f'dmc_reacher-hard_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_reacher_hard_promp kwargs=kwargs_dict_reacher_hard_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_reacher-hard_promp-v0")
@ -136,7 +136,7 @@ for _task in _dmc_cartpole_tasks:
kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10 kwargs_dict_cartpole_dmp['controller_kwargs']['d_gains'] = 10
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_dmp kwargs=kwargs_dict_cartpole_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -150,7 +150,7 @@ for _task in _dmc_cartpole_tasks:
kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2 kwargs_dict_cartpole_promp['trajectory_generator_kwargs']['weight_scale'] = 0.2
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole_promp kwargs=kwargs_dict_cartpole_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -167,7 +167,7 @@ kwargs_dict_cartpole2poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-two_poles_dmp-v0' _env_id = f'dmc_cartpole-two_poles_dmp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_dmp kwargs=kwargs_dict_cartpole2poles_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -181,7 +181,7 @@ kwargs_dict_cartpole2poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-two_poles_promp-v0' _env_id = f'dmc_cartpole-two_poles_promp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole2poles_promp kwargs=kwargs_dict_cartpole2poles_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -198,7 +198,7 @@ kwargs_dict_cartpole3poles_dmp['controller_kwargs']['d_gains'] = 10
_env_id = f'dmc_cartpole-three_poles_dmp-v0' _env_id = f'dmc_cartpole-three_poles_dmp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_dmp kwargs=kwargs_dict_cartpole3poles_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -212,7 +212,7 @@ kwargs_dict_cartpole3poles_promp['trajectory_generator_kwargs']['weight_scale']
_env_id = f'dmc_cartpole-three_poles_promp-v0' _env_id = f'dmc_cartpole-three_poles_promp-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_cartpole3poles_promp kwargs=kwargs_dict_cartpole3poles_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -227,7 +227,7 @@ kwargs_dict_mani_reach_site_features_dmp['trajectory_generator_kwargs']['weight_
kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_mani_reach_site_features_dmp['controller_kwargs']['controller_type'] = 'velocity'
register( register(
id=f'dmc_manipulation-reach_site_dmp-v0', id=f'dmc_manipulation-reach_site_dmp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_dmp kwargs=kwargs_dict_mani_reach_site_features_dmp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0")
@ -239,7 +239,7 @@ kwargs_dict_mani_reach_site_features_promp['trajectory_generator_kwargs']['weigh
kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity' kwargs_dict_mani_reach_site_features_promp['controller_kwargs']['controller_type'] = 'velocity'
register( register(
id=f'dmc_manipulation-reach_site_promp-v0', id=f'dmc_manipulation-reach_site_promp-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_mani_reach_site_features_promp kwargs=kwargs_dict_mani_reach_site_features_promp
) )
ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0") ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("dmc_manipulation-reach_site_promp-v0")

View File

@ -5,9 +5,9 @@ import collections
from collections.abc import MutableMapping from collections.abc import MutableMapping
from typing import Any, Dict, Tuple, Optional, Union, Callable from typing import Any, Dict, Tuple, Optional, Union, Callable
from dm_control import composer
import gym import gym
import numpy as np import numpy as np
from dm_control import composer
from dm_control.rl import control from dm_control.rl import control
from dm_env import specs from dm_env import specs
from gym import spaces from gym import spaces

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -1,3 +1,3 @@
from .mp_wrapper import MPWrapper from .mp_wrapper import MPWrapper
from .mp_wrapper import TwoPolesMPWrapper
from .mp_wrapper import ThreePolesMPWrapper from .mp_wrapper import ThreePolesMPWrapper
from .mp_wrapper import TwoPolesMPWrapper

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -8,13 +8,13 @@ from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv
from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv
from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv
from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP from .mujoco.ant_jump.ant_jump import MAX_EPISODE_STEPS_ANTJUMP
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP from .mujoco.half_cheetah_jump.half_cheetah_jump import MAX_EPISODE_STEPS_HALFCHEETAHJUMP
from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER
from .mujoco.reacher.reacher import ReacherEnv
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []}
@ -64,7 +64,7 @@ DEFAULT_BB_DICT_DMP = {
## Simple Reacher ## Simple Reacher
register( register(
id='SimpleReacher-v0', id='SimpleReacher-v0',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 2, "n_links": 2,
@ -73,7 +73,7 @@ register(
register( register(
id='LongSimpleReacher-v0', id='LongSimpleReacher-v0',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 5, "n_links": 5,
@ -84,7 +84,7 @@ register(
register( register(
id='ViaPointReacher-v0', id='ViaPointReacher-v0',
entry_point='alr_envs.envs.classic_control:ViaPointReacherEnv', entry_point='fancy_gym.envs.classic_control:ViaPointReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 5, "n_links": 5,
@ -96,7 +96,7 @@ register(
## Hole Reacher ## Hole Reacher
register( register(
id='HoleReacher-v0', id='HoleReacher-v0',
entry_point='alr_envs.envs.classic_control:HoleReacherEnv', entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 5, "n_links": 5,
@ -112,12 +112,12 @@ register(
# Mujoco # Mujoco
## Reacher ## Mujoco Reacher
for _dims in [5, 7]: for _dims in [5, 7]:
register( register(
id=f'Reacher{_dims}d-v0', id=f'Reacher{_dims}d-v0',
entry_point='alr_envs.envs.mujoco:ReacherEnv', entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=MAX_EPISODE_STEPS_REACHER,
kwargs={ kwargs={
"n_links": _dims, "n_links": _dims,
} }
@ -125,17 +125,18 @@ for _dims in [5, 7]:
register( register(
id=f'Reacher{_dims}dSparse-v0', id=f'Reacher{_dims}dSparse-v0',
entry_point='alr_envs.envs.mujoco:ReacherEnv', entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=MAX_EPISODE_STEPS_REACHER,
kwargs={ kwargs={
"sparse": True, "sparse": True,
'reward_weight': 200,
"n_links": _dims, "n_links": _dims,
} }
) )
register( register(
id='HopperJumpSparse-v0', id='HopperJumpSparse-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpEnv', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={ kwargs={
"sparse": True, "sparse": True,
@ -144,7 +145,7 @@ register(
register( register(
id='HopperJump-v0', id='HopperJump-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpEnv', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={ kwargs={
"sparse": False, "sparse": False,
@ -156,43 +157,43 @@ register(
register( register(
id='AntJump-v0', id='AntJump-v0',
entry_point='alr_envs.envs.mujoco:AntJumpEnv', entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
) )
register( register(
id='HalfCheetahJump-v0', id='HalfCheetahJump-v0',
entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv', entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
) )
register( register(
id='HopperJumpOnBox-v0', id='HopperJumpOnBox-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv', entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
) )
register( register(
id='ALRHopperThrow-v0', id='HopperThrow-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowEnv', entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
) )
register( register(
id='ALRHopperThrowInBasket-v0', id='HopperThrowInBasket-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv', entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
) )
register( register(
id='ALRWalker2DJump-v0', id='Walker2DJump-v0',
entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv', entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
) )
register( register(
id='BeerPong-v0', id='BeerPong-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnv', entry_point='fancy_gym.envs.mujoco:BeerPongEnv',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
@ -200,18 +201,18 @@ register(
# only one time step, i.e. we simulate until the end of th episode # only one time step, i.e. we simulate until the end of th episode
register( register(
id='BeerPongStepBased-v0', id='BeerPongStepBased-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', entry_point='fancy_gym.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
# Beerpong with episodic reward, but fixed release time step # Beerpong with episodic reward, but fixed release time step
register( register(
id='BeerPongFixedRelease-v0', id='BeerPongFixedRelease-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep', entry_point='fancy_gym.envs.mujoco:BeerPongEnvFixedReleaseStep',
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
# Motion Primitive Environments # movement Primitive Environments
## Simple Reacher ## Simple Reacher
_versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"] _versions = ["SimpleReacher-v0", "LongSimpleReacher-v0"]
@ -227,7 +228,7 @@ for _v in _versions:
kwargs_dict_simple_reacher_dmp['name'] = f"{_v}" kwargs_dict_simple_reacher_dmp['name'] = f"{_v}"
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_dmp kwargs=kwargs_dict_simple_reacher_dmp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
@ -240,7 +241,7 @@ for _v in _versions:
kwargs_dict_simple_reacher_promp['name'] = _v kwargs_dict_simple_reacher_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_simple_reacher_promp kwargs=kwargs_dict_simple_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -254,7 +255,7 @@ kwargs_dict_via_point_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0" kwargs_dict_via_point_reacher_dmp['name'] = "ViaPointReacher-v0"
register( register(
id='ViaPointReacherDMP-v0', id='ViaPointReacherDMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs=kwargs_dict_via_point_reacher_dmp kwargs=kwargs_dict_via_point_reacher_dmp
) )
@ -266,7 +267,7 @@ kwargs_dict_via_point_reacher_promp['controller_kwargs']['controller_type'] = 'v
kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0" kwargs_dict_via_point_reacher_promp['name'] = "ViaPointReacher-v0"
register( register(
id="ViaPointReacherProMP-v0", id="ViaPointReacherProMP-v0",
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_via_point_reacher_promp kwargs=kwargs_dict_via_point_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0")
@ -285,7 +286,7 @@ for _v in _versions:
kwargs_dict_hole_reacher_dmp['name'] = _v kwargs_dict_hole_reacher_dmp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs=kwargs_dict_hole_reacher_dmp kwargs=kwargs_dict_hole_reacher_dmp
) )
@ -299,7 +300,7 @@ for _v in _versions:
kwargs_dict_hole_reacher_promp['name'] = f"{_v}" kwargs_dict_hole_reacher_promp['name'] = f"{_v}"
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hole_reacher_promp kwargs=kwargs_dict_hole_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -309,30 +310,26 @@ _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSpa
for _v in _versions: for _v in _versions:
_name = _v.split("-") _name = _v.split("-")
_env_id = f'{_name[0]}DMP-{_name[1]}' _env_id = f'{_name[0]}DMP-{_name[1]}'
kwargs_dict_reacherNd_dmp = deepcopy(DEFAULT_BB_DICT_DMP) kwargs_dict_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP)
kwargs_dict_reacherNd_dmp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_reacher_dmp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_reacherNd_dmp['trajectory_generator_kwargs']['weight_scale'] = 5 kwargs_dict_reacher_dmp['phase_generator_kwargs']['alpha_phase'] = 2
kwargs_dict_reacherNd_dmp['phase_generator_kwargs']['alpha_phase'] = 2 kwargs_dict_reacher_dmp['name'] = _v
kwargs_dict_reacherNd_dmp['basis_generator_kwargs']['num_basis'] = 2
kwargs_dict_reacherNd_dmp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# max_episode_steps=1, # max_episode_steps=1,
kwargs=kwargs_dict_reacherNd_dmp kwargs=kwargs_dict_reacher_dmp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id)
_env_id = f'{_name[0]}ProMP-{_name[1]}' _env_id = f'{_name[0]}ProMP-{_name[1]}'
kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP)
kwargs_dict_alr_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper) kwargs_dict_reacher_promp['wrappers'].append(mujoco.reacher.MPWrapper)
kwargs_dict_alr_reacher_promp['controller_kwargs']['p_gains'] = 1 kwargs_dict_reacher_promp['name'] = _v
kwargs_dict_alr_reacher_promp['controller_kwargs']['d_gains'] = 0.1
kwargs_dict_alr_reacher_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_alr_reacher_promp kwargs=kwargs_dict_reacher_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -352,7 +349,7 @@ for _v in _versions:
kwargs_dict_bp_promp['name'] = _v kwargs_dict_bp_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp kwargs=kwargs_dict_bp_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -372,7 +369,7 @@ for _v in _versions:
kwargs_dict_bp_promp['name'] = _v kwargs_dict_bp_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_bp_promp kwargs=kwargs_dict_bp_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -384,7 +381,7 @@ for _v in _versions:
# ######################################################################################################################## # ########################################################################################################################
# #
# ## AntJump # ## AntJump
# _versions = ['ALRAntJump-v0'] # _versions = ['AntJump-v0']
# for _v in _versions: # for _v in _versions:
# _name = _v.split("-") # _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}' # _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -393,7 +390,7 @@ for _v in _versions:
# kwargs_dict_ant_jump_promp['name'] = _v # kwargs_dict_ant_jump_promp['name'] = _v
# register( # register(
# id=_env_id, # id=_env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_ant_jump_promp # kwargs=kwargs_dict_ant_jump_promp
# ) # )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -401,7 +398,7 @@ for _v in _versions:
# ######################################################################################################################## # ########################################################################################################################
# #
# ## HalfCheetahJump # ## HalfCheetahJump
# _versions = ['ALRHalfCheetahJump-v0'] # _versions = ['HalfCheetahJump-v0']
# for _v in _versions: # for _v in _versions:
# _name = _v.split("-") # _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}' # _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -410,7 +407,7 @@ for _v in _versions:
# kwargs_dict_halfcheetah_jump_promp['name'] = _v # kwargs_dict_halfcheetah_jump_promp['name'] = _v
# register( # register(
# id=_env_id, # id=_env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_halfcheetah_jump_promp # kwargs=kwargs_dict_halfcheetah_jump_promp
# ) # )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -420,7 +417,7 @@ for _v in _versions:
## HopperJump ## HopperJump
_versions = ['HopperJump-v0', 'HopperJumpSparse-v0', _versions = ['HopperJump-v0', 'HopperJumpSparse-v0',
# 'ALRHopperJumpOnBox-v0', 'ALRHopperThrow-v0', 'ALRHopperThrowInBasket-v0' # 'HopperJumpOnBox-v0', 'HopperThrow-v0', 'HopperThrowInBasket-v0'
] ]
# TODO: Check if all environments work with the same MPWrapper # TODO: Check if all environments work with the same MPWrapper
for _v in _versions: for _v in _versions:
@ -431,7 +428,7 @@ for _v in _versions:
kwargs_dict_hopper_jump_promp['name'] = _v kwargs_dict_hopper_jump_promp['name'] = _v
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
kwargs=kwargs_dict_hopper_jump_promp kwargs=kwargs_dict_hopper_jump_promp
) )
ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -440,7 +437,7 @@ for _v in _versions:
# #
# #
# ## Walker2DJump # ## Walker2DJump
# _versions = ['ALRWalker2DJump-v0'] # _versions = ['Walker2DJump-v0']
# for _v in _versions: # for _v in _versions:
# _name = _v.split("-") # _name = _v.split("-")
# _env_id = f'{_name[0]}ProMP-{_name[1]}' # _env_id = f'{_name[0]}ProMP-{_name[1]}'
@ -449,7 +446,7 @@ for _v in _versions:
# kwargs_dict_walker2d_jump_promp['name'] = _v # kwargs_dict_walker2d_jump_promp['name'] = _v
# register( # register(
# id=_env_id, # id=_env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper',
# kwargs=kwargs_dict_walker2d_jump_promp # kwargs=kwargs_dict_walker2d_jump_promp
# ) # )
# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
@ -458,7 +455,7 @@ for _v in _versions:
""" """
register( register(
id='SimpleReacher-v1', id='SimpleReacher-v1',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 2, "n_links": 2,
@ -468,7 +465,7 @@ register(
register( register(
id='LongSimpleReacher-v1', id='LongSimpleReacher-v1',
entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', entry_point='fancy_gym.envs.classic_control:SimpleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 5, "n_links": 5,
@ -477,7 +474,7 @@ register(
) )
register( register(
id='HoleReacher-v1', id='HoleReacher-v1',
entry_point='alr_envs.envs.classic_control:HoleReacherEnv', entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 5, "n_links": 5,
@ -492,7 +489,7 @@ register(
) )
register( register(
id='HoleReacher-v2', id='HoleReacher-v2',
entry_point='alr_envs.envs.classic_control:HoleReacherEnv', entry_point='fancy_gym.envs.classic_control:HoleReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"n_links": 5, "n_links": 5,
@ -508,8 +505,8 @@ register(
# CtxtFree are v0, Contextual are v1 # CtxtFree are v0, Contextual are v1
register( register(
id='ALRAntJump-v0', id='AntJump-v0',
entry_point='alr_envs.envs.mujoco:AntJumpEnv', entry_point='fancy_gym.envs.mujoco:AntJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP,
@ -518,8 +515,8 @@ register(
) )
# CtxtFree are v0, Contextual are v1 # CtxtFree are v0, Contextual are v1
register( register(
id='ALRHalfCheetahJump-v0', id='HalfCheetahJump-v0',
entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv', entry_point='fancy_gym.envs.mujoco:HalfCheetahJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP,
@ -527,8 +524,8 @@ register(
} }
) )
register( register(
id='ALRHopperJump-v0', id='HopperJump-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpEnv', entry_point='fancy_gym.envs.mujoco:HopperJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP,
@ -546,26 +543,26 @@ for i in _vs:
_env_id = f'ALRReacher{i}-v0' _env_id = f'ALRReacher{i}-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.envs.mujoco:ReacherEnv', entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"steps_before_reward": 0, "steps_before_reward": 0,
"n_links": 5, "n_links": 5,
"balance": False, "balance": False,
'ctrl_cost_weight': i '_ctrl_cost_weight': i
} }
) )
_env_id = f'ALRReacherSparse{i}-v0' _env_id = f'ALRReacherSparse{i}-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.envs.mujoco:ReacherEnv', entry_point='fancy_gym.envs.mujoco:ReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={
"steps_before_reward": 200, "steps_before_reward": 200,
"n_links": 5, "n_links": 5,
"balance": False, "balance": False,
'ctrl_cost_weight': i '_ctrl_cost_weight': i
} }
) )
_vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1] _vs = np.arange(101).tolist() + [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1]
@ -573,7 +570,7 @@ for i in _vs:
_env_id = f'ALRReacher{i}ProMP-v0' _env_id = f'ALRReacher{i}ProMP-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
kwargs={ kwargs={
"name": f"{_env_id.replace('ProMP', '')}", "name": f"{_env_id.replace('ProMP', '')}",
"wrappers": [mujoco.reacher.MPWrapper], "wrappers": [mujoco.reacher.MPWrapper],
@ -596,7 +593,7 @@ for i in _vs:
_env_id = f'ALRReacherSparse{i}ProMP-v0' _env_id = f'ALRReacherSparse{i}ProMP-v0'
register( register(
id=_env_id, id=_env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', entry_point='fancy_gym.utils.make_env_helpers:make_promp_env_helper',
kwargs={ kwargs={
"name": f"{_env_id.replace('ProMP', '')}", "name": f"{_env_id.replace('ProMP', '')}",
"wrappers": [mujoco.reacher.MPWrapper], "wrappers": [mujoco.reacher.MPWrapper],
@ -617,8 +614,8 @@ for i in _vs:
) )
register( register(
id='ALRHopperJumpOnBox-v0', id='HopperJumpOnBox-v0',
entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv', entry_point='fancy_gym.envs.mujoco:HopperJumpOnBoxEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX,
@ -626,8 +623,8 @@ for i in _vs:
} }
) )
register( register(
id='ALRHopperThrow-v0', id='HopperThrow-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowEnv', entry_point='fancy_gym.envs.mujoco:HopperThrowEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW,
@ -635,8 +632,8 @@ for i in _vs:
} }
) )
register( register(
id='ALRHopperThrowInBasket-v0', id='HopperThrowInBasket-v0',
entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv', entry_point='fancy_gym.envs.mujoco:HopperThrowInBasketEnv',
max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET,
@ -644,8 +641,8 @@ for i in _vs:
} }
) )
register( register(
id='ALRWalker2DJump-v0', id='Walker2DJump-v0',
entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv', entry_point='fancy_gym.envs.mujoco:Walker2dJumpEnv',
max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP,
kwargs={ kwargs={
"max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP,
@ -653,13 +650,13 @@ for i in _vs:
} }
) )
register(id='TableTennis2DCtxt-v1', register(id='TableTennis2DCtxt-v1',
entry_point='alr_envs.envs.mujoco:TTEnvGym', entry_point='fancy_gym.envs.mujoco:TTEnvGym',
max_episode_steps=MAX_EPISODE_STEPS, max_episode_steps=MAX_EPISODE_STEPS,
kwargs={'ctxt_dim': 2, 'fixed_goal': True}) kwargs={'ctxt_dim': 2, 'fixed_goal': True})
register( register(
id='ALRBeerPong-v0', id='BeerPong-v0',
entry_point='alr_envs.envs.mujoco:ALRBeerBongEnv', entry_point='fancy_gym.envs.mujoco:BeerBongEnv',
max_episode_steps=300, max_episode_steps=300,
kwargs={ kwargs={
"rndm_goal": False, "rndm_goal": False,

View File

@ -7,7 +7,7 @@ from gym import spaces
from gym.core import ObsType from gym.core import ObsType
from gym.utils import seeding from gym.utils import seeding
from alr_envs.envs.classic_control.utils import intersect from fancy_gym.envs.classic_control.utils import intersect
class BaseReacherEnv(gym.Env, ABC): class BaseReacherEnv(gym.Env, ABC):

View File

@ -1,14 +1,16 @@
from abc import ABC from abc import ABC
from gym import spaces
import numpy as np import numpy as np
from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv from gym import spaces
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
class BaseReacherDirectEnv(BaseReacherEnv, ABC): class BaseReacherDirectEnv(BaseReacherEnv, ABC):
""" """
Base class for directly controlled reaching environments Base class for directly controlled reaching environments
""" """
def __init__(self, n_links: int, random_start: bool = True, def __init__(self, n_links: int, random_start: bool = True,
allow_self_collision: bool = False): allow_self_collision: bool = False):
super().__init__(n_links, random_start, allow_self_collision) super().__init__(n_links, random_start, allow_self_collision)

View File

@ -1,14 +1,16 @@
from abc import ABC from abc import ABC
from gym import spaces
import numpy as np import numpy as np
from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv from gym import spaces
from fancy_gym.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv
class BaseReacherTorqueEnv(BaseReacherEnv, ABC): class BaseReacherTorqueEnv(BaseReacherEnv, ABC):
""" """
Base class for torque controlled reaching environments Base class for torque controlled reaching environments
""" """
def __init__(self, n_links: int, random_start: bool = True, def __init__(self, n_links: int, random_start: bool = True,
allow_self_collision: bool = False): allow_self_collision: bool = False):
super().__init__(n_links, random_start, allow_self_collision) super().__init__(n_links, random_start, allow_self_collision)

View File

@ -6,7 +6,9 @@ import numpy as np
from gym.core import ObsType from gym.core import ObsType
from matplotlib import patches from matplotlib import patches
from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
MAX_EPISODE_STEPS_HOLEREACHER = 200
class HoleReacherEnv(BaseReacherDirectEnv): class HoleReacherEnv(BaseReacherDirectEnv):
@ -41,13 +43,13 @@ class HoleReacherEnv(BaseReacherDirectEnv):
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
if rew_fct == "simple": if rew_fct == "simple":
from alr_envs.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward from fancy_gym.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "vel_acc": elif rew_fct == "vel_acc":
from alr_envs.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward from fancy_gym.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty)
elif rew_fct == "unbounded": elif rew_fct == "unbounded":
from alr_envs.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward from fancy_gym.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward
self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision) self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision)
else: else:
raise ValueError("Unknown reward function {}".format(rew_fct)) raise ValueError("Unknown reward function {}".format(rew_fct))
@ -224,7 +226,6 @@ class HoleReacherEnv(BaseReacherDirectEnv):
if __name__ == "__main__": if __name__ == "__main__":
import time
env = HoleReacherEnv(5) env = HoleReacherEnv(5)
env.reset() env.reset()

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -5,7 +5,7 @@ import numpy as np
from gym import spaces from gym import spaces
from gym.core import ObsType from gym.core import ObsType
from alr_envs.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv from fancy_gym.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv
class SimpleReacherEnv(BaseReacherTorqueEnv): class SimpleReacherEnv(BaseReacherTorqueEnv):

View File

@ -1,6 +1,3 @@
import numpy as np
def ccw(A, B, C): def ccw(A, B, C):
return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12 return (C[1] - A[1]) * (B[0] - A[0]) - (B[1] - A[1]) * (C[0] - A[0]) > 1e-12

View File

@ -2,7 +2,7 @@ from typing import Tuple, Union
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

View File

@ -4,9 +4,8 @@ import gym
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from gym.core import ObsType from gym.core import ObsType
from gym.utils import seeding
from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv from fancy_gym.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv
class ViaPointReacherEnv(BaseReacherDirectEnv): class ViaPointReacherEnv(BaseReacherDirectEnv):
@ -187,7 +186,6 @@ class ViaPointReacherEnv(BaseReacherDirectEnv):
if __name__ == "__main__": if __name__ == "__main__":
import time
env = ViaPointReacherEnv(5) env = ViaPointReacherEnv(5)
env.reset() env.reset()

View File

@ -1,9 +1,9 @@
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
from .ant_jump.ant_jump import AntJumpEnv from .ant_jump.ant_jump import AntJumpEnv
from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward
from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv
from .hopper_jump.hopper_jump import HopperJumpEnv
from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import HopperThrowEnv from .hopper_throw.hopper_throw import HopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv
from .reacher.reacher import ReacherEnv from .reacher.reacher import ReacherEnv
from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv
from .hopper_jump.hopper_jump import HopperJumpEnv

View File

@ -17,7 +17,7 @@ class AntJumpEnv(AntEnv):
""" """
Initialization changes to normal Ant: Initialization changes to normal Ant:
- healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc - healthy_reward: 1.0 -> 0.01 -> 0.0 no healthy reward needed - Paul and Marc
- ctrl_cost_weight 0.5 -> 0.0 - _ctrl_cost_weight 0.5 -> 0.0
- contact_cost_weight: 5e-4 -> 0.0 - contact_cost_weight: 5e-4 -> 0.0
- healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height? - healthy_z_range: (0.2, 1.0) -> (0.3, float('inf')) !!!!! Does that make sense, limiting height?
""" """

View File

@ -2,7 +2,7 @@ from typing import Union, Tuple
import numpy as np import numpy as np
from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper): class MPWrapper(RawInterfaceWrapper):

Some files were not shown because too many files have changed in this diff Show More