diff --git a/.gitignore b/.gitignore index 79c652b..ec01816 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,5 @@ venv.bak/ #configs /configs/db.cfg +legacy/ +MUJOCO_LOG.TXT diff --git a/MUJOCO_LOG.TXT b/MUJOCO_LOG.TXT deleted file mode 100644 index d398e34..0000000 --- a/MUJOCO_LOG.TXT +++ /dev/null @@ -1,15 +0,0 @@ -Fri Aug 28 14:41:56 2020 -ERROR: GLEW initalization error: Missing GL version - -Fri Aug 28 14:59:14 2020 -ERROR: GLEW initalization error: Missing GL version - -Fri Aug 28 15:03:43 2020 -ERROR: GLEW initalization error: Missing GL version - -Fri Aug 28 15:07:03 2020 -ERROR: GLEW initalization error: Missing GL version - -Fri Aug 28 15:15:01 2020 -ERROR: GLEW initalization error: Missing GL version - diff --git a/README.md b/README.md index 41d87f2..860bddc 100644 --- a/README.md +++ b/README.md @@ -1,87 +1,212 @@ -## ALR Environments - -This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc. -Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). -For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation. -We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation. -When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks. +## ALR Robotics Control Environments -## Environments -Currently we have the following environments: +This project offers a large verity of reinforcement learning environments under a unifying interface base on OpenAI gym. +Besides, some custom environments we also provide support for the benchmark suites +[OpenAI gym](https://gym.openai.com/), +[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) +(DMC), and [Metaworld](https://meta-world.github.io/). Custom (Mujoco) gym environment can be created according +to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). Unlike existing libraries, we +further support to control agents with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, +we only consider the mean usually). -### Mujoco +## Motion Primitive Environments (Episodic environments) -|Name| Description|Horizon|Action Dimension|Observation Dimension -|---|---|---|---|---| -|`ALRReacher-v0`|Modified (5 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 5 | 21 -|`ALRReacherSparse-v0`|Same as `ALRReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 5 | 21 -|`ALRReacherSparseBalanced-v0`|Same as `ALRReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 5 | 21 -|`ALRLongReacher-v0`|Modified (7 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 7 | 27 -|`ALRLongReacherSparse-v0`|Same as `ALRLongReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 7 | 27 -|`ALRLongReacherSparseBalanced-v0`|Same as `ALRLongReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 7 | 27 -|`ALRBallInACupSimple-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector. | 4000 | 3 | wip -|`ALRBallInACup-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector | 4000 | 7 | wip -|`ALRBallInACupGoal-v0`| Similiar to `ALRBallInACupSimple-v0` but the ball needs to be caught at a specified goal position | 4000 | 7 | wip - -### Classic Control +Unlike step-based environments, motion primitive (MP) environments are closer related to stochastic search, black box +optimization and methods that often used in robotics. MP environments are trajectory-based and always execute a full +trajectory, which is generated by a Dynamic Motion Primitive (DMP) or a Probabilistic Motion Primitive (DetPMP). The +generated trajectory is translated into individual step-wise actions by a controller. The exact choice of controller is, +however, dependent on the type of environment. We currently support position, velocity, and PD-Controllers for position, +velocity and torque control, respectively. The goal of all MP environments is still to learn a policy. Yet, an action +represents the parametrization of the motion primitives to generate a suitable trajectory. Additionally, in this +framework we support the above setting for the contextual setting, for which we expose all changing substates of the +task as a single observation in the beginning. This requires to predict a new action/MP parametrization for each +trajectory. All environments provide the next to the cumulative episode reward also all collected information from each +step as part of the info dictionary. This information should, however, mainly be used for debugging and logging. -|Name| Description|Horizon|Action Dimension|Observation Dimension -|---|---|---|---|---| -|`SimpleReacher-v0`| Simple reaching task (2 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 2 | 9 -|`LongSimpleReacher-v0`| Simple reaching task (5 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 5 | 18 -|`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18 -|`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18 +|Key| Description| +|---|---| +`trajectory`| Generated trajectory from MP +`step_actions`| Step-wise executed action based on controller output +`step_observations`| Step-wise intermediate observations +`step_rewards`| Step-wise rewards +`trajectory_length`| Total number of environment interactions +`other`| All other information from the underlying environment are returned as a list with length `trajectory_length` maintaining the original key. In case some information are not provided every time step, the missing values are filled with `None`. -### DMP Environments -These environments are closer to stochastic search. They always execute a full trajectory, which is computed by a DMP and executed by a controller, e.g. a PD controller. -The goal is to learn the parameters of this DMP to generate a suitable trajectory. -All environments provide the full episode reward and additional information about early terminations, e.g. due to collisions. +## Installation -|Name| Description|Horizon|Action Dimension|Context Dimension -|---|---|---|---|---| -|`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25 -|`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25 -|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 -|`ALRBallInACupSimpleDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupSimple-v0` task where only 3 joints are actuated. | 4000 | 15 -|`ALRBallInACupDMP-v0`| A DMP provides a trajectory for the `ALRBallInACup-v0` task. | 4000 | 35 -|`ALRBallInACupGoalDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupGoal-v0` task. | 4000 | 35 | 3 +1. Clone the repository -[//]: |`HoleReacherDetPMP-v0`| - -### Stochastic Search -|Name| Description|Horizon|Action Dimension|Observation Dimension -|---|---|---|---|---| -|`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. | 1 | `{dim}` | 0 - - -## Install -1. Clone the repository ```bash git clone git@github.com:ALRhub/alr_envs.git ``` -2. Go to the folder + +2. Go to the folder + ```bash cd alr_envs ``` -3. Install with + +3. Install with + ```bash pip install -e . ``` -4. Use (see [example.py](./example.py)): -```python -import gym -env = gym.make('alr_envs:SimpleReacher-v0') +## Using the framework + +We prepared [multiple examples](alr_envs/examples/), please have a look there for more specific examples. + +### Step-wise environments + +```python +import alr_envs + +env = alr_envs.make('HoleReacher-v0', seed=1) state = env.reset() -for i in range(10000): +for i in range(1000): state, reward, done, info = env.step(env.action_space.sample()) if i % 5 == 0: env.render() if done: state = env.reset() - ``` -For an example using a DMP wrapped env and asynchronous sampling look at [mp_env_async_sampler.py](./alr_envs/utils/mp_env_async_sampler.py) \ No newline at end of file +For Deepmind control tasks we expect the `env_id` to be specified as `domain_name-task_name` or for manipulation tasks +as `manipulation-environment_name`. All other environments can be created based on their original name. + +Existing MP tasks can be created the same way as above. Just keep in mind, calling `step()` always executs a full +trajectory. + +```python +import alr_envs + +env = alr_envs.make('HoleReacherDetPMP-v0', seed=1) +# render() can be called once in the beginning with all necessary arguments. To turn it of again just call render(None). +env.render() + +state = env.reset() + +for i in range(5): + state, reward, done, info = env.step(env.action_space.sample()) + + # Not really necessary as the environments resets itself after each trajectory anyway. + state = env.reset() +``` + +To show all available environments, we provide some additional convenience. Each value will return a dictionary with two +keys `DMP` and `DetPMP` that store a list of available environment names. + +```python +import alr_envs + +print("Custom MP tasks:") +print(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS) + +print("OpenAI Gym MP tasks:") +print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS) + +print("Deepmind Control MP tasks:") +print(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS) + +print("MetaWorld MP tasks:") +print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS) +``` + +### How to create a new MP task + +In case a required task is not supported yet in the MP framework, it can be created relatively easy. For the task at +hand, the following interface needs to be implemented. + +```python +import numpy as np +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + """ + Returns boolean mask for each substate in the full observation. + It determines whether the observation is returned for the contextual case or not. + This effectively allows to filter unwanted or unnecessary observations from the full step-based case. + E.g. Velocities starting at 0 are only changing after the first action. Given we only receive the first + observation, the velocities are not necessary in the observation for the MP task. + """ + return np.ones(self.observation_space.shape, dtype=bool) + + @property + def current_vel(self): + """ + Returns the current velocity of the action/control dimension. + The dimensionality has to match the action/control dimension. + This is not required when exclusively using position control, + it should, however, be implemented regardless. + E.g. The joint velocities that are directly or indirectly controlled by the action. + """ + raise NotImplementedError() + + @property + def current_pos(self): + """ + Returns the current position of the action/control dimension. + The dimensionality has to match the action/control dimension. + This is not required when exclusively using velocity control, + it should, however, be implemented regardless. + E.g. The joint positions that are directly or indirectly controlled by the action. + """ + raise NotImplementedError() + + @property + def goal_pos(self): + """ + Returns a predefined final position of the action/control dimension. + This is only required for the DMP and is most of the time learned instead. + """ + raise NotImplementedError() + + @property + def dt(self): + """ + Returns the time between two simulated steps of the environment + """ + raise NotImplementedError() + +``` + +If you created a new task wrapper, feel free to open a PR, so we can integrate it for others to use as well. +Without the integration the task can still be used. A rough outline can be shown here, for more details we recommend +having a look at the [examples](alr_envs/examples/). + +```python +import alr_envs + +# Base environment name, according to structure of above example +base_env_id = "ball_in_cup-catch" + +# Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. +# You can also add other gym.Wrappers in case they are needed, +# e.g. gym.wrappers.FlattenObservation for dict observations +wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper] +mp_kwargs = {...} +kwargs = {...} +env = alr_envs.make_dmp_env(base_env_id, wrappers=wrappers, seed=1, mp_kwargs=mp_kwargs, **kwargs) +# OR for a deterministic ProMP (other mp_kwargs are required): +# env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) + +rewards = 0 +obs = env.reset() + +# number of samples/full trajectories (multiple environment steps) +for i in range(5): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(base_env_id, rewards) + rewards = 0 + obs = env.reset() +``` diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py new file mode 100644 index 0000000..179839c --- /dev/null +++ b/alr_envs/alr/__init__.py @@ -0,0 +1,329 @@ +from gym import register + +from . import classic_control, mujoco +from .classic_control.hole_reacher.hole_reacher import HoleReacherEnv +from .classic_control.simple_reacher.simple_reacher import SimpleReacherEnv +from .classic_control.viapoint_reacher.viapoint_reacher import ViaPointReacherEnv +from .mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv +from .mujoco.ball_in_a_cup.biac_pd import ALRBallInACupPDEnv +from .mujoco.reacher.alr_reacher import ALRReacherEnv +from .mujoco.reacher.balancing import BalancingEnv + +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} + +# Classic Control +## Simple Reacher +register( + id='SimpleReacher-v0', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 2, + } +) + +register( + id='SimpleReacher-v1', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 2, + "random_start": False + } +) + +register( + id='LongSimpleReacher-v0', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + } +) + +register( + id='LongSimpleReacher-v1', + entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False + } +) + +## Viapoint Reacher + +register( + id='ViaPointReacher-v0', + entry_point='alr_envs.alr.classic_control:ViaPointReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "allow_self_collision": False, + "collision_penalty": 1000 + } +) + +## Hole Reacher +register( + id='HoleReacher-v0', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": True, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": None, + "hole_depth": 1, + "hole_x": None, + "collision_penalty": 100, + } +) + +register( + id='HoleReacher-v1', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": 0.25, + "hole_depth": 1, + "hole_x": None, + "collision_penalty": 100, + } +) + +register( + id='HoleReacher-v2', + entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + "random_start": False, + "allow_self_collision": False, + "allow_wall_collision": False, + "hole_width": 0.25, + "hole_depth": 1, + "hole_x": 2, + "collision_penalty": 100, + } +) + +# Mujoco + +## Reacher +register( + id='ALRReacher-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 0, + "n_links": 5, + "balance": False, + } +) + +register( + id='ALRReacherSparse-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": False, + } +) + +register( + id='ALRReacherSparseBalanced-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, + "balance": True, + } +) + +register( + id='ALRLongReacher-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 0, + "n_links": 7, + "balance": False, + } +) + +register( + id='ALRLongReacherSparse-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 7, + "balance": False, + } +) + +register( + id='ALRLongReacherSparseBalanced-v0', + entry_point='alr_envs.alr.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 7, + "balance": True, + } +) + +## Balancing Reacher + +register( + id='Balancing-v0', + entry_point='alr_envs.alr.mujoco:BalancingEnv', + max_episode_steps=200, + kwargs={ + "n_links": 5, + } +) + +# Motion Primitive Environments + +## Simple Reacher +_versions = ["SimpleReacher-v0", "SimpleReacher-v1", "LongSimpleReacher-v0", "LongSimpleReacher-v1"] +for _v in _versions: + _name = _v.split("-") + _env_id = f'{_name[0]}DMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [classic_control.simple_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2 if "long" not in _v.lower() else 5, + "num_basis": 5, + "duration": 20, + "alpha_phase": 2, + "learn_goal": True, + "policy_type": "velocity", + "weights_scale": 50, + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + + _env_id = f'{_name[0]}DetPMP-{_name[1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"alr_envs:{_v}", + "wrappers": [classic_control.simple_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2 if "long" not in _v.lower() else 5, + "num_basis": 5, + "duration": 20, + "width": 0.025, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +# Viapoint reacher +register( + id='ViaPointReacherDMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": "alr_envs:ViaPointReacher-v0", + "wrappers": [classic_control.viapoint_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "policy_type": "velocity", + "weights_scale": 50, + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") + +register( + id='ViaPointReacherDetPMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": "alr_envs:ViaPointReacher-v0", + "wrappers": [classic_control.viapoint_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "width": 0.025, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True + } + } +) +ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ViaPointReacherDetPMP-v0") + +## Hole Reacher +_versions = ["v0", "v1", "v2"] +for _v in _versions: + _env_id = f'HoleReacherDMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"alr_envs:HoleReacher-{_v}", + "wrappers": [classic_control.hole_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + + _env_id = f'HoleReacherDetPMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"alr_envs:HoleReacher-{_v}", + "wrappers": [classic_control.hole_reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "width": 0.025, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True + } + } + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/alr/classic_control/README.MD new file mode 100644 index 0000000..ebe2101 --- /dev/null +++ b/alr_envs/alr/classic_control/README.MD @@ -0,0 +1,21 @@ +### Classic Control + +## Step-based Environments +|Name| Description|Horizon|Action Dimension|Observation Dimension +|---|---|---|---|---| +|`SimpleReacher-v0`| Simple reaching task (2 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 2 | 9 +|`LongSimpleReacher-v0`| Simple reaching task (5 links) without any physics simulation. Provides no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions towards the end of the trajectory.| 200 | 5 | 18 +|`ViaPointReacher-v0`| Simple reaching task leveraging a via point, which supports self collision detection. Provides a reward only at 100 and 199 for reaching the viapoint and goal point, respectively.| 200 | 5 | 18 +|`HoleReacher-v0`| 5 link reaching task where the end-effector needs to reach into a narrow hole without collding with itself or walls | 200 | 5 | 18 + +## MP Environments +|Name| Description|Horizon|Action Dimension|Context Dimension +|---|---|---|---|---| +|`ViaPointReacherDMP-v0`| A DMP provides a trajectory for the `ViaPointReacher-v0` task. | 200 | 25 +|`HoleReacherFixedGoalDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task with a fixed goal attractor. | 200 | 25 +|`HoleReacherDMP-v0`| A DMP provides a trajectory for the `HoleReacher-v0` task. The goal attractor needs to be learned. | 200 | 30 +|`ALRBallInACupSimpleDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupSimple-v0` task where only 3 joints are actuated. | 4000 | 15 +|`ALRBallInACupDMP-v0`| A DMP provides a trajectory for the `ALRBallInACup-v0` task. | 4000 | 35 +|`ALRBallInACupGoalDMP-v0`| A DMP provides a trajectory for the `ALRBallInACupGoal-v0` task. | 4000 | 35 | 3 + +[//]: |`HoleReacherDetPMP-v0`| \ No newline at end of file diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/alr/classic_control/__init__.py new file mode 100644 index 0000000..73575ab --- /dev/null +++ b/alr_envs/alr/classic_control/__init__.py @@ -0,0 +1,3 @@ +from .hole_reacher.hole_reacher import HoleReacherEnv +from .simple_reacher.simple_reacher import SimpleReacherEnv +from .viapoint_reacher.viapoint_reacher import ViaPointReacherEnv diff --git a/alr_envs/alr/classic_control/hole_reacher/__init__.py b/alr_envs/alr/classic_control/hole_reacher/__init__.py new file mode 100644 index 0000000..c5e6d2f --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py similarity index 72% rename from alr_envs/classic_control/hole_reacher.py rename to alr_envs/alr/classic_control/hole_reacher/hole_reacher.py index ce6d9c3..ee1a997 100644 --- a/alr_envs/classic_control/hole_reacher.py +++ b/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py @@ -6,11 +6,10 @@ import numpy as np from gym.utils import seeding from matplotlib import patches -from alr_envs.classic_control.utils import check_self_collision -from alr_envs.utils.mps.mp_environments import AlrEnv +from alr_envs.alr.classic_control.utils import check_self_collision -class HoleReacherEnv(AlrEnv): +class HoleReacherEnv(gym.Env): def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None, hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False, @@ -22,14 +21,14 @@ class HoleReacherEnv(AlrEnv): self.random_start = random_start # provided initial parameters - self._hole_x = hole_x # x-position of center of hole - self._hole_width = hole_width # width of hole - self._hole_depth = hole_depth # depth of hole + self.initial_x = hole_x # x-position of center of hole + self.initial_width = hole_width # width of hole + self.initial_depth = hole_depth # depth of hole # temp container for current env state - self._tmp_hole_x = None - self._tmp_hole_width = None - self._tmp_hole_depth = None + self._tmp_x = None + self._tmp_width = None + self._tmp_depth = None self._goal = None # x-y coordinates for reaching the center at the bottom of the hole # collision @@ -44,7 +43,7 @@ class HoleReacherEnv(AlrEnv): self._start_pos = np.hstack([[np.pi / 2], np.zeros(self.n_links - 1)]) self._start_vel = np.zeros(self.n_links) - self.dt = 0.01 + self._dt = 0.01 action_bound = np.pi * np.ones((self.n_links,)) state_bound = np.hstack([ @@ -66,6 +65,22 @@ class HoleReacherEnv(AlrEnv): self._steps = 0 self.seed() + @property + def dt(self) -> Union[float, int]: + return self._dt + + # @property + # def start_pos(self): + # return self._start_pos + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + def step(self, action: np.ndarray): """ A single step with an action in joint velocity space @@ -88,7 +103,7 @@ class HoleReacherEnv(AlrEnv): def reset(self): if self.random_start: - # Maybe change more than dirst seed + # Maybe change more than first seed first_joint = self.np_random.uniform(np.pi / 4, 3 * np.pi / 4) self._joint_angles = np.hstack([[first_joint], np.zeros(self.n_links - 1)]) self._start_pos = self._joint_angles.copy() @@ -107,30 +122,27 @@ class HoleReacherEnv(AlrEnv): return self._get_obs().copy() def _generate_hole(self): - if self._hole_width is None: - width = self.np_random.uniform(0.15, 0.5, 1) + if self.initial_width is None: + width = self.np_random.uniform(0.15, 0.5) else: - width = np.copy(self._hole_width) - - if self._hole_x is None: + width = np.copy(self.initial_width) + if self.initial_x is None: # sample whole on left or right side - direction = np.random.choice([-1, 1]) + direction = self.np_random.choice([-1, 1]) # Hole center needs to be half the width away from the arm to give a valid setting. - x = direction * self.np_random.uniform(width / 2, 3.5, 1) + x = direction * self.np_random.uniform(width / 2, 3.5) else: - x = np.copy(self._hole_x) - - if self._hole_depth is None: + x = np.copy(self.initial_x) + if self.initial_depth is None: # TODO we do not want this right now. - depth = self.np_random.uniform(1, 1, 1) + depth = self.np_random.uniform(1, 1) else: - depth = np.copy(self._hole_depth) + depth = np.copy(self.initial_depth) - self._tmp_hole_width = width - self._tmp_hole_x = x - self._tmp_hole_depth = depth - - self._goal = np.hstack([self._tmp_hole_x, -self._tmp_hole_depth]) + self._tmp_width = width + self._tmp_x = x + self._tmp_depth = depth + self._goal = np.hstack([self._tmp_x, -self._tmp_depth]) def _update_joints(self): """ @@ -178,7 +190,7 @@ class HoleReacherEnv(AlrEnv): np.cos(theta), np.sin(theta), self._angle_velocity, - self._tmp_hole_width, + self._tmp_width, # self._tmp_hole_depth, self.end_effector - self._goal, self._steps @@ -204,9 +216,8 @@ class HoleReacherEnv(AlrEnv): return np.squeeze(end_effector + self._joints[0, :]) def _check_wall_collision(self, line_points): - # all points that are before the hole in x - r, c = np.where(line_points[:, :, 0] < (self._tmp_hole_x - self._tmp_hole_width / 2)) + r, c = np.where(line_points[:, :, 0] < (self._tmp_x - self._tmp_width / 2)) # check if any of those points are below surface nr_line_points_below_surface_before_hole = np.sum(line_points[r, c, 1] < 0) @@ -215,7 +226,7 @@ class HoleReacherEnv(AlrEnv): return True # all points that are after the hole in x - r, c = np.where(line_points[:, :, 0] > (self._tmp_hole_x + self._tmp_hole_width / 2)) + r, c = np.where(line_points[:, :, 0] > (self._tmp_x + self._tmp_width / 2)) # check if any of those points are below surface nr_line_points_below_surface_after_hole = np.sum(line_points[r, c, 1] < 0) @@ -224,11 +235,11 @@ class HoleReacherEnv(AlrEnv): return True # all points that are above the hole - r, c = np.where((line_points[:, :, 0] > (self._tmp_hole_x - self._tmp_hole_width / 2)) & ( - line_points[:, :, 0] < (self._tmp_hole_x + self._tmp_hole_width / 2))) + r, c = np.where((line_points[:, :, 0] > (self._tmp_x - self._tmp_width / 2)) & ( + line_points[:, :, 0] < (self._tmp_x + self._tmp_width / 2))) # check if any of those points are below surface - nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_hole_depth) + nr_line_points_below_surface_in_hole = np.sum(line_points[r, c, 1] < -self._tmp_depth) if nr_line_points_below_surface_in_hole > 0: return True @@ -252,7 +263,7 @@ class HoleReacherEnv(AlrEnv): self.fig.show() self.fig.gca().set_title( - f"Iteration: {self._steps}, distance: {self.end_effector - self._goal}") + f"Iteration: {self._steps}, distance: {np.linalg.norm(self.end_effector - self._goal) ** 2}") if mode == "human": @@ -271,17 +282,17 @@ class HoleReacherEnv(AlrEnv): def _set_patches(self): if self.fig is not None: self.fig.gca().patches = [] - left_block = patches.Rectangle((-self.n_links, -self._tmp_hole_depth), - self.n_links + self._tmp_hole_x - self._tmp_hole_width / 2, - self._tmp_hole_depth, + left_block = patches.Rectangle((-self.n_links, -self._tmp_depth), + self.n_links + self._tmp_x - self._tmp_width / 2, + self._tmp_depth, fill=True, edgecolor='k', facecolor='k') - right_block = patches.Rectangle((self._tmp_hole_x + self._tmp_hole_width / 2, -self._tmp_hole_depth), - self.n_links - self._tmp_hole_x + self._tmp_hole_width / 2, - self._tmp_hole_depth, + right_block = patches.Rectangle((self._tmp_x + self._tmp_width / 2, -self._tmp_depth), + self.n_links - self._tmp_x + self._tmp_width / 2, + self._tmp_depth, fill=True, edgecolor='k', facecolor='k') - hole_floor = patches.Rectangle((self._tmp_hole_x - self._tmp_hole_width / 2, -self._tmp_hole_depth), - self._tmp_hole_width, - 1 - self._tmp_hole_depth, + hole_floor = patches.Rectangle((self._tmp_x - self._tmp_width / 2, -self._tmp_depth), + self._tmp_width, + 1 - self._tmp_depth, fill=True, edgecolor='k', facecolor='k') # Add the patch to the Axes @@ -289,26 +300,6 @@ class HoleReacherEnv(AlrEnv): self.fig.gca().add_patch(right_block) self.fig.gca().add_patch(hole_floor) - @property - def active_obs(self): - return np.hstack([ - [self.random_start] * self.n_links, # cos - [self.random_start] * self.n_links, # sin - [self.random_start] * self.n_links, # velocity - [self._hole_width is None], # hole width - # [self._hole_depth is None], # hole depth - [True] * 2, # x-y coordinates of target distance - [False] # env steps - ]) - - @property - def start_pos(self) -> Union[float, int, np.ndarray]: - return self._start_pos - - @property - def goal_pos(self) -> Union[float, int, np.ndarray]: - raise ValueError("Goal position is not available and has to be learnt based on the environment.") - def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] @@ -318,25 +309,6 @@ class HoleReacherEnv(AlrEnv): return self._joints[self.n_links].T def close(self): + super().close() if self.fig is not None: plt.close(self.fig) - - -if __name__ == '__main__': - nl = 5 - render_mode = "human" # "human" or "partial" or "final" - env = HoleReacherEnv(n_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=None, - hole_depth=1, hole_x=None) - obs = env.reset() - - for i in range(2000): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - obs, rew, d, info = env.step(ac) - if i % 10 == 0: - env.render(mode=render_mode) - if d: - env.reset() - - env.close() diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py new file mode 100644 index 0000000..d951161 --- /dev/null +++ b/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py @@ -0,0 +1,35 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def active_obs(self): + return np.hstack([ + [self.env.random_start] * self.env.n_links, # cos + [self.env.random_start] * self.env.n_links, # sin + [self.env.random_start] * self.env.n_links, # velocity + [self.env.initial_width is None], # hole width + # [self.env.hole_depth is None], # hole depth + [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/alr/classic_control/simple_reacher/__init__.py b/alr_envs/alr/classic_control/simple_reacher/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/alr/classic_control/simple_reacher/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py new file mode 100644 index 0000000..4b71e3a --- /dev/null +++ b/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py @@ -0,0 +1,33 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def active_obs(self): + return np.hstack([ + [self.env.random_start] * self.env.n_links, # cos + [self.env.random_start] * self.env.n_links, # sin + [self.env.random_start] * self.env.n_links, # velocity + [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py similarity index 80% rename from alr_envs/classic_control/simple_reacher.py rename to alr_envs/alr/classic_control/simple_reacher/simple_reacher.py index 5b04aad..08c151f 100644 --- a/alr_envs/classic_control/simple_reacher.py +++ b/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py @@ -1,14 +1,13 @@ from typing import Iterable, Union +import gym import matplotlib.pyplot as plt import numpy as np from gym import spaces from gym.utils import seeding -from alr_envs.utils.mps.mp_environments import AlrEnv - -class SimpleReacherEnv(AlrEnv): +class SimpleReacherEnv(gym.Env): """ Simple Reaching Task without any physics simulation. Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions @@ -19,19 +18,22 @@ class SimpleReacherEnv(AlrEnv): super().__init__() self.link_lengths = np.ones(n_links) self.n_links = n_links - self.dt = 0.1 + self._dt = 0.1 self.random_start = random_start + # provided initial parameters + self.inital_target = target + + # temp container for current env state + self._goal = None + self._joints = None self._joint_angles = None self._angle_velocity = None self._start_pos = np.zeros(self.n_links) self._start_vel = np.zeros(self.n_links) - self._target = target # provided target value - self._goal = None # updated goal value, does not change when target != None - self.max_torque = 1 self.steps_before_reward = 199 @@ -53,6 +55,22 @@ class SimpleReacherEnv(AlrEnv): self._steps = 0 self.seed() + @property + def dt(self) -> Union[float, int]: + return self._dt + + # @property + # def start_pos(self): + # return self._start_pos + + @property + def current_pos(self): + return self._joint_angles + + @property + def current_vel(self): + return self._angle_velocity + def step(self, action: np.ndarray): """ A single step with action in torque space @@ -126,14 +144,14 @@ class SimpleReacherEnv(AlrEnv): def _generate_goal(self): - if self._target is None: + if self.inital_target is None: total_length = np.sum(self.link_lengths) goal = np.array([total_length, total_length]) while np.linalg.norm(goal) >= total_length: goal = self.np_random.uniform(low=-total_length, high=total_length, size=2) else: - goal = np.copy(self._target) + goal = np.copy(self.inital_target) self._goal = goal @@ -172,24 +190,6 @@ class SimpleReacherEnv(AlrEnv): self.fig.canvas.draw() self.fig.canvas.flush_events() - @property - def active_obs(self): - return np.hstack([ - [self.random_start] * self.n_links, # cos - [self.random_start] * self.n_links, # sin - [self.random_start] * self.n_links, # velocity - [True] * 2, # x-y coordinates of target distance - [False] # env steps - ]) - - @property - def start_pos(self): - return self._start_pos - - @property - def goal_pos(self): - raise ValueError("Goal position is not available and has to be learnt based on the environment.") - def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] @@ -200,26 +200,3 @@ class SimpleReacherEnv(AlrEnv): @property def end_effector(self): return self._joints[self.n_links].T - - -if __name__ == '__main__': - nl = 5 - render_mode = "human" # "human" or "partial" or "final" - env = SimpleReacherEnv(n_links=nl) - obs = env.reset() - print("First", obs) - - for i in range(2000): - # objective.load_result("/tmp/cma") - # test with random actions - ac = 2 * env.action_space.sample() - # ac = np.ones(env.action_space.shape) - obs, rew, d, info = env.step(ac) - env.render(mode=render_mode) - - print(obs[env.active_obs].shape) - - if d or i % 200 == 0: - env.reset() - - env.close() diff --git a/alr_envs/classic_control/utils.py b/alr_envs/alr/classic_control/utils.py similarity index 90% rename from alr_envs/classic_control/utils.py rename to alr_envs/alr/classic_control/utils.py index dbaa88e..fa8176a 100644 --- a/alr_envs/classic_control/utils.py +++ b/alr_envs/alr/classic_control/utils.py @@ -10,7 +10,7 @@ def intersect(A, B, C, D): def check_self_collision(line_points): - "Checks whether line segments and intersect" + """Checks whether line segments and intersect""" for i, line1 in enumerate(line_points): for line2 in line_points[i + 2:, :, :]: if intersect(line1[0], line1[-1], line2[0], line2[-1]): diff --git a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/alr/classic_control/viapoint_reacher/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py new file mode 100644 index 0000000..6b3e85d --- /dev/null +++ b/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py @@ -0,0 +1,34 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def active_obs(self): + return np.hstack([ + [self.env.random_start] * self.env.n_links, # cos + [self.env.random_start] * self.env.n_links, # sin + [self.env.random_start] * self.env.n_links, # velocity + [self.env.initial_via_target is None] * 2, # x-y coordinates of via point distance + [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_pos + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.current_vel + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/classic_control/viapoint_reacher.py b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py similarity index 83% rename from alr_envs/classic_control/viapoint_reacher.py rename to alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py index 8edcc69..95d1e7f 100644 --- a/alr_envs/classic_control/viapoint_reacher.py +++ b/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py @@ -5,13 +5,12 @@ import matplotlib.pyplot as plt import numpy as np from gym.utils import seeding -from alr_envs.classic_control.utils import check_self_collision -from alr_envs.utils.mps.mp_environments import AlrEnv +from alr_envs.alr.classic_control.utils import check_self_collision -class ViaPointReacher(AlrEnv): +class ViaPointReacherEnv(gym.Env): - def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None, + def __init__(self, n_links, random_start: bool = False, via_target: Union[None, Iterable] = None, target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000): self.n_links = n_links @@ -20,8 +19,8 @@ class ViaPointReacher(AlrEnv): self.random_start = random_start # provided initial parameters - self._target = target # provided target value - self._via_target = via_target # provided via point target value + self.intitial_target = target # provided target value + self.initial_via_target = via_target # provided via point target value # temp container for current env state self._via_point = np.ones(2) @@ -39,7 +38,7 @@ class ViaPointReacher(AlrEnv): self._start_vel = np.zeros(self.n_links) self.weight_matrix_scale = 1 - self.dt = 0.01 + self._dt = 0.01 action_bound = np.pi * np.ones((self.n_links,)) state_bound = np.hstack([ @@ -60,6 +59,22 @@ class ViaPointReacher(AlrEnv): self._steps = 0 self.seed() + @property + def dt(self): + return self._dt + + # @property + # def start_pos(self): + # return self._start_pos + + @property + def current_pos(self): + return self._joint_angles.copy() + + @property + def current_vel(self): + return self._angle_velocity.copy() + def step(self, action: np.ndarray): """ a single step with an action in joint velocity space @@ -104,22 +119,22 @@ class ViaPointReacher(AlrEnv): total_length = np.sum(self.link_lengths) # rejection sampled point in inner circle with 0.5*Radius - if self._via_target is None: + if self.initial_via_target is None: via_target = np.array([total_length, total_length]) while np.linalg.norm(via_target) >= 0.5 * total_length: via_target = self.np_random.uniform(low=-0.5 * total_length, high=0.5 * total_length, size=2) else: - via_target = np.copy(self._via_target) + via_target = np.copy(self.initial_via_target) # rejection sampled point in outer circle - if self._target is None: + if self.intitial_target is None: goal = np.array([total_length, total_length]) while np.linalg.norm(goal) >= total_length or np.linalg.norm(goal) <= 0.5 * total_length: goal = self.np_random.uniform(low=-total_length, high=total_length, size=2) else: - goal = np.copy(self._target) + goal = np.copy(self.intitial_target) - self._via_target = via_target + self._via_point = via_target self._goal = goal def _update_joints(self): @@ -266,25 +281,6 @@ class ViaPointReacher(AlrEnv): plt.pause(0.01) - @property - def active_obs(self): - return np.hstack([ - [self.random_start] * self.n_links, # cos - [self.random_start] * self.n_links, # sin - [self.random_start] * self.n_links, # velocity - [self._via_target is None] * 2, # x-y coordinates of via point distance - [True] * 2, # x-y coordinates of target distance - [False] # env steps - ]) - - @property - def start_pos(self) -> Union[float, int, np.ndarray]: - return self._start_pos - - @property - def goal_pos(self) -> Union[float, int, np.ndarray]: - raise ValueError("Goal position is not available and has to be learnt based on the environment.") - def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] @@ -296,26 +292,3 @@ class ViaPointReacher(AlrEnv): def close(self): if self.fig is not None: plt.close(self.fig) - - -if __name__ == '__main__': - nl = 5 - render_mode = "human" # "human" or "partial" or "final" - env = ViaPointReacher(n_links=nl, allow_self_collision=False) - env.reset() - env.render(mode=render_mode) - - for i in range(300): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - # ac[0] += np.pi/2 - obs, rew, d, info = env.step(ac) - env.render(mode=render_mode) - - print(rew) - - if d: - break - - env.close() diff --git a/alr_envs/alr/mujoco/README.MD b/alr_envs/alr/mujoco/README.MD new file mode 100644 index 0000000..0ea5a1f --- /dev/null +++ b/alr_envs/alr/mujoco/README.MD @@ -0,0 +1,15 @@ +# Custom Mujoco tasks + +## Step-based Environments +|Name| Description|Horizon|Action Dimension|Observation Dimension +|---|---|---|---|---| +|`ALRReacher-v0`|Modified (5 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 5 | 21 +|`ALRReacherSparse-v0`|Same as `ALRReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 5 | 21 +|`ALRReacherSparseBalanced-v0`|Same as `ALRReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 5 | 21 +|`ALRLongReacher-v0`|Modified (7 links) Mujoco gym's `Reacher-v2` (2 links)| 200 | 7 | 27 +|`ALRLongReacherSparse-v0`|Same as `ALRLongReacher-v0`, but the distance penalty is only provided in the last time step.| 200 | 7 | 27 +|`ALRLongReacherSparseBalanced-v0`|Same as `ALRLongReacherSparse-v0`, but the end-effector has to remain upright.| 200 | 7 | 27 +|`ALRBallInACupSimple-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector. | 4000 | 3 | wip +|`ALRBallInACup-v0`| Ball-in-a-cup task where a robot needs to catch a ball attached to a cup at its end-effector | 4000 | 7 | wip +|`ALRBallInACupGoal-v0`| Similar to `ALRBallInACupSimple-v0` but the ball needs to be caught at a specified goal position | 4000 | 7 | wip + \ No newline at end of file diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py new file mode 100644 index 0000000..6744d15 --- /dev/null +++ b/alr_envs/alr/mujoco/__init__.py @@ -0,0 +1,4 @@ +from .reacher.alr_reacher import ALRReacherEnv +from .reacher.balancing import BalancingEnv +from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv +from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv \ No newline at end of file diff --git a/alr_envs/mujoco/alr_reward_fct.py b/alr_envs/alr/mujoco/alr_reward_fct.py similarity index 100% rename from alr_envs/mujoco/alr_reward_fct.py rename to alr_envs/alr/mujoco/alr_reward_fct.py diff --git a/alr_envs/mujoco/ball_in_a_cup/__init__.py b/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py similarity index 100% rename from alr_envs/mujoco/ball_in_a_cup/__init__.py rename to alr_envs/alr/mujoco/ball_in_a_cup/__init__.py diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml b/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml similarity index 100% rename from alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml rename to alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py similarity index 77% rename from alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py rename to alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py index 345b3ce..7cff670 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -1,30 +1,28 @@ from gym import utils import os import numpy as np -from alr_envs.mujoco import alr_mujoco_env +from gym.envs.mujoco import MujocoEnv -class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): + +class ALRBallInACupEnv(MujocoEnv, utils.EzPickle): def __init__(self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False, reward_type: str = None, context: np.ndarray = None): + utils.EzPickle.__init__(**locals()) self._steps = 0 - self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", - "biac_base" + ".xml") + self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml") self._q_pos = [] self._q_vel = [] # self.weight_matrix_scale = 50 self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) - self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5]) - self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) self.context = context - utils.EzPickle.__init__(self) alr_mujoco_env.AlrMujocoEnv.__init__(self, self.xml_path, apply_gravity_comp=apply_gravity_comp, @@ -37,13 +35,13 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): self.sim_time = 8 # seconds self.sim_steps = int(self.sim_time / self.dt) if reward_type == "no_context": - from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward + from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward reward_function = BallInACupReward elif reward_type == "contextual_goal": - from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward + from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward reward_function = BallInACupReward else: - raise ValueError("Unknown reward type") + raise ValueError("Unknown reward type: {}".format(reward_type)) self.reward_function = reward_function(self.sim_steps) @property @@ -68,6 +66,10 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): def current_vel(self): return self.sim.data.qvel[0:7].copy() + def reset(self): + self.reward_function.reset(None) + return super().reset() + def reset_model(self): init_pos_all = self.init_qpos.copy() init_pos_robot = self._start_pos @@ -102,14 +104,18 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): done = success or self._steps == self.sim_steps - 1 or is_collided self._steps += 1 else: - reward = -2 + reward = -2000 success = False is_collided = False done = True return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, velocity=angular_vel, - traj=self._q_pos, is_success=success, + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + is_success=success, is_collided=is_collided, sim_crash=crash) def check_traj_in_joint_limits(self): @@ -150,6 +156,22 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): des_vel_full[5] = des_vel[2] return des_vel_full + def render(self, render_mode, **render_kwargs): + if render_mode == "plot_trajectory": + if self._steps == 1: + import matplotlib.pyplot as plt + # plt.ion() + self.fig, self.axs = plt.subplots(3, 1) + + if self._steps <= 1750: + for ax, cp in zip(self.axs, self.current_pos[1::2]): + ax.scatter(self._steps, cp, s=2, marker=".") + + # self.fig.show() + + else: + super().render(render_mode, **render_kwargs) + if __name__ == "__main__": env = ALRBallInACupEnv() @@ -172,4 +194,3 @@ if __name__ == "__main__": break env.close() - diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py new file mode 100644 index 0000000..945fa8d --- /dev/null +++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py @@ -0,0 +1,42 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class BallInACupMPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # TODO: @Max Filter observations correctly + return np.hstack([ + [False] * 7, # cos + [False] * 7, # sin + # [True] * 2, # x-y coordinates of target distance + [False] # env steps + ]) + + @property + def start_pos(self): + if self.simplified: + return self._start_pos[1::2] + else: + return self._start_pos + + @property + def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qpos[0:7].copy() + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.sim.data.qvel[0:7].copy() + + @property + def goal_pos(self): + # TODO: @Max I think the default value of returning to the start is reasonable here + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py similarity index 99% rename from alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py rename to alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py index e34aecf..4ea4381 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py +++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py @@ -1,5 +1,5 @@ import numpy as np -from alr_envs.mujoco import alr_reward_fct +from alr_envs.alr.mujoco import alr_reward_fct class BallInACupReward(alr_reward_fct.AlrReward): diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py similarity index 62% rename from alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py rename to alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py index 79987d6..a147d89 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py +++ b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py @@ -1,11 +1,10 @@ import numpy as np -from alr_envs.mujoco import alr_reward_fct +from alr_envs.alr.mujoco import alr_reward_fct class BallInACupReward(alr_reward_fct.AlrReward): - def __init__(self, sim_time): - self.sim_time = sim_time - + def __init__(self, env): + self.env = env self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below", "wrist_palm_link_convex_geom", "wrist_pitch_link_convex_decomposition_p1_geom", @@ -22,7 +21,7 @@ class BallInACupReward(alr_reward_fct.AlrReward): self.goal_final_id = None self.collision_ids = None self._is_collided = False - self.collision_penalty = 1 + self.collision_penalty = 1000 self.ball_traj = None self.dists = None @@ -32,55 +31,62 @@ class BallInACupReward(alr_reward_fct.AlrReward): self.reset(None) def reset(self, context): - self.ball_traj = np.zeros(shape=(self.sim_time, 3)) + # self.sim_time = self.env.sim.dtsim_time + self.ball_traj = [] # np.zeros(shape=(self.sim_time, 3)) self.dists = [] self.dists_final = [] self.costs = [] self.action_costs = [] + self.angle_costs = [] self.cup_angles = [] - def compute_reward(self, action, env): - self.ball_id = env.sim.model._body_name2id["ball"] - self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"] - self.goal_id = env.sim.model._site_name2id["cup_goal"] - self.goal_final_id = env.sim.model._site_name2id["cup_goal_final"] - self.collision_ids = [env.sim.model._geom_name2id[name] for name in self.collision_objects] + def compute_reward(self, action): + self.ball_id = self.env.sim.model._body_name2id["ball"] + self.ball_collision_id = self.env.sim.model._geom_name2id["ball_geom"] + self.goal_id = self.env.sim.model._site_name2id["cup_goal"] + self.goal_final_id = self.env.sim.model._site_name2id["cup_goal_final"] + self.collision_ids = [self.env.sim.model._geom_name2id[name] for name in self.collision_objects] - ball_in_cup = self.check_ball_in_cup(env.sim, self.ball_collision_id) + ball_in_cup = self.check_ball_in_cup(self.env.sim, self.ball_collision_id) # Compute the current distance from the ball to the inner part of the cup - goal_pos = env.sim.data.site_xpos[self.goal_id] - ball_pos = env.sim.data.body_xpos[self.ball_id] - goal_final_pos = env.sim.data.site_xpos[self.goal_final_id] + goal_pos = self.env.sim.data.site_xpos[self.goal_id] + ball_pos = self.env.sim.data.body_xpos[self.ball_id] + goal_final_pos = self.env.sim.data.site_xpos[self.goal_final_id] self.dists.append(np.linalg.norm(goal_pos - ball_pos)) self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) - self.ball_traj[env._steps, :] = ball_pos - cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]]) - self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), - 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2))) + # self.ball_traj[self.env._steps, :] = ball_pos + self.ball_traj.append(ball_pos) + cup_quat = np.copy(self.env.sim.data.body_xquat[self.env.sim.model._body_name2id["cup"]]) + cup_angle = np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), + 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)) + cost_angle = (cup_angle - np.pi / 2) ** 2 + self.angle_costs.append(cost_angle) + self.cup_angles.append(cup_angle) action_cost = np.sum(np.square(action)) self.action_costs.append(action_cost) - self._is_collided = self.check_collision(env.sim) or env.check_traj_in_joint_limits() + self._is_collided = self.check_collision(self.env.sim) or self.env.check_traj_in_joint_limits() - if env._steps == env.sim_steps - 1 or self._is_collided: + if self.env._steps == self.env.ep_length - 1 or self._is_collided: t_min_dist = np.argmin(self.dists) angle_min_dist = self.cup_angles[t_min_dist] - cost_angle = (angle_min_dist - np.pi / 2)**2 + # cost_angle = (angle_min_dist - np.pi / 2)**2 - min_dist = self.dists[t_min_dist] + + # min_dist = self.dists[t_min_dist] dist_final = self.dists_final[-1] min_dist_final = np.min(self.dists_final) - cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + + # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided) # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) - reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) + reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided) success = dist_final < 0.05 and ball_in_cup and not self._is_collided crash = self._is_collided else: - reward = - 1e-5 * action_cost # TODO: increase action_cost weight + reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight success = False crash = False diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py b/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py new file mode 100644 index 0000000..b047c54 --- /dev/null +++ b/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py @@ -0,0 +1,205 @@ +import os + +import gym.envs.mujoco +import gym.envs.mujoco as mujoco_env +import mujoco_py.builder +import numpy as np +from gym import utils + +from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper +from mp_env_api.utils.policies import PDControllerExtend + + +def make_detpmp_env(**kwargs): + name = kwargs.pop("name") + _env = gym.make(name) + policy = PDControllerExtend(_env, p_gains=kwargs.pop('p_gains'), d_gains=kwargs.pop('d_gains')) + kwargs['policy_type'] = policy + return DetPMPWrapper(_env, **kwargs) + + +class ALRBallInACupPDEnv(mujoco_env.MujocoEnv, utils.EzPickle): + def __init__(self, frame_skip=4, apply_gravity_comp=True, simplified: bool = False, + reward_type: str = None, context: np.ndarray = None): + utils.EzPickle.__init__(**locals()) + self._steps = 0 + + self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml") + + self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) + + self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) + self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) + + self.context = context + self.apply_gravity_comp = apply_gravity_comp + self.simplified = simplified + + self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) + self._start_vel = np.zeros(7) + + self.sim_time = 8 # seconds + self._dt = 0.02 + self.ep_length = 4000 # based on 8 seconds with dt = 0.02 int(self.sim_time / self.dt) + if reward_type == "no_context": + from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward + reward_function = BallInACupReward + elif reward_type == "contextual_goal": + from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward + reward_function = BallInACupReward + else: + raise ValueError("Unknown reward type: {}".format(reward_type)) + self.reward_function = reward_function(self) + + mujoco_env.MujocoEnv.__init__(self, self.xml_path, frame_skip) + + @property + def dt(self): + return self._dt + + # TODO: @Max is this even needed? + @property + def start_vel(self): + if self.simplified: + return self._start_vel[1::2] + else: + return self._start_vel + + # def _set_action_space(self): + # if self.simplified: + # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)[1::2] + # else: + # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) + # low, high = bounds.T + # self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) + # return self.action_space + + def reset(self): + self.reward_function.reset(None) + return super().reset() + + def reset_model(self): + init_pos_all = self.init_qpos.copy() + init_pos_robot = self._start_pos + init_vel = np.zeros_like(init_pos_all) + + self._steps = 0 + self._q_pos = [] + self._q_vel = [] + + start_pos = init_pos_all + start_pos[0:7] = init_pos_robot + + self.set_state(start_pos, init_vel) + + return self._get_obs() + + def step(self, a): + reward_dist = 0.0 + angular_vel = 0.0 + reward_ctrl = - np.square(a).sum() + + # if self.simplified: + # tmp = np.zeros(7) + # tmp[1::2] = a + # a = tmp + + if self.apply_gravity_comp: + a += self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0] + + crash = False + try: + self.do_simulation(a, self.frame_skip) + except mujoco_py.builder.MujocoException: + crash = True + # joint_cons_viol = self.check_traj_in_joint_limits() + + ob = self._get_obs() + + if not crash: + reward, success, is_collided = self.reward_function.compute_reward(a) + done = success or is_collided # self._steps == self.sim_steps - 1 + self._steps += 1 + else: + reward = -2000 + success = False + is_collided = False + done = True + + return ob, reward, done, dict(reward_dist=reward_dist, + reward_ctrl=reward_ctrl, + velocity=angular_vel, + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + is_success=success, + is_collided=is_collided, sim_crash=crash) + + def check_traj_in_joint_limits(self): + return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) + + # TODO: extend observation space + def _get_obs(self): + theta = self.sim.data.qpos.flat[:7] + return np.concatenate([ + np.cos(theta), + np.sin(theta), + # self.get_body_com("target"), # only return target to make problem harder + [self._steps], + ]) + + # These functions are for the task with 3 joint actuations + def extend_des_pos(self, des_pos): + des_pos_full = self._start_pos.copy() + des_pos_full[1] = des_pos[0] + des_pos_full[3] = des_pos[1] + des_pos_full[5] = des_pos[2] + return des_pos_full + + def extend_des_vel(self, des_vel): + des_vel_full = self._start_vel.copy() + des_vel_full[1] = des_vel[0] + des_vel_full[3] = des_vel[1] + des_vel_full[5] = des_vel[2] + return des_vel_full + + def render(self, render_mode, **render_kwargs): + if render_mode == "plot_trajectory": + if self._steps == 1: + import matplotlib.pyplot as plt + # plt.ion() + self.fig, self.axs = plt.subplots(3, 1) + + if self._steps <= 1750: + for ax, cp in zip(self.axs, self.current_pos[1::2]): + ax.scatter(self._steps, cp, s=2, marker=".") + + # self.fig.show() + + else: + super().render(render_mode, **render_kwargs) + + +if __name__ == "__main__": + env = ALRBallInACupPDEnv(reward_type="no_context", simplified=True) + # env = gym.make("alr_envs:ALRBallInACupPDSimpleDetPMP-v0") + # ctxt = np.array([-0.20869846, -0.66376693, 1.18088501]) + + # env.configure(ctxt) + env.reset() + env.render("human") + for i in range(16000): + # test with random actions + ac = 0.02 * env.action_space.sample()[0:7] + # ac = env.start_pos + # ac[0] += np.pi/2 + obs, rew, d, info = env.step(ac) + env.render("human") + + print(rew) + + if d: + break + + env.close() diff --git a/alr_envs/mujoco/ball_in_a_cup/utils.py b/alr_envs/alr/mujoco/ball_in_a_cup/utils.py similarity index 93% rename from alr_envs/mujoco/ball_in_a_cup/utils.py rename to alr_envs/alr/mujoco/ball_in_a_cup/utils.py index 714566a..0dd972c 100644 --- a/alr_envs/mujoco/ball_in_a_cup/utils.py +++ b/alr_envs/alr/mujoco/ball_in_a_cup/utils.py @@ -1,6 +1,6 @@ -from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper -from alr_envs.utils.mps.dmp_wrapper import DmpWrapper -from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv +from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv +from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper +from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper def make_contextual_env(rank, seed=0): @@ -26,7 +26,7 @@ def make_contextual_env(rank, seed=0): return _init -def make_env(rank, seed=0): +def _make_env(rank, seed=0): """ Utility function for multiprocessed env. diff --git a/alr_envs/mujoco/beerpong/__init__.py b/alr_envs/alr/mujoco/beerpong/__init__.py similarity index 100% rename from alr_envs/mujoco/beerpong/__init__.py rename to alr_envs/alr/mujoco/beerpong/__init__.py diff --git a/alr_envs/mujoco/beerpong/assets/beerpong.xml b/alr_envs/alr/mujoco/beerpong/assets/beerpong.xml similarity index 100% rename from alr_envs/mujoco/beerpong/assets/beerpong.xml rename to alr_envs/alr/mujoco/beerpong/assets/beerpong.xml diff --git a/alr_envs/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py similarity index 86% rename from alr_envs/mujoco/beerpong/beerpong.py rename to alr_envs/alr/mujoco/beerpong/beerpong.py index 5efc431..4d8f389 100644 --- a/alr_envs/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -1,11 +1,15 @@ -from gym import utils import os + import numpy as np -from alr_envs.mujoco import alr_mujoco_env +from gym import utils +from gym.envs.mujoco import MujocoEnv -class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): - def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): + +class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): + def __init__(self, model_path, frame_skip, n_substeps=4, apply_gravity_comp=True, reward_function=None): + utils.EzPickle.__init__(**locals()) + MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -25,17 +29,15 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) self.context = None - - utils.EzPickle.__init__(self) - alr_mujoco_env.AlrMujocoEnv.__init__(self, - self.xml_path, - apply_gravity_comp=apply_gravity_comp, - n_substeps=n_substeps) + # alr_mujoco_env.AlrMujocoEnv.__init__(self, + # self.xml_path, + # apply_gravity_comp=apply_gravity_comp, + # n_substeps=n_substeps) self.sim_time = 8 # seconds self.sim_steps = int(self.sim_time / self.dt) if reward_function is None: - from alr_envs.mujoco.beerpong.beerpong_reward import BeerpongReward + from alr_envs.alr.mujoco.beerpong.beerpong_reward import BeerpongReward reward_function = BeerpongReward self.reward_function = reward_function(self.sim, self.sim_steps) self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] diff --git a/alr_envs/mujoco/beerpong/beerpong_reward.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py similarity index 99% rename from alr_envs/mujoco/beerpong/beerpong_reward.py rename to alr_envs/alr/mujoco/beerpong/beerpong_reward.py index 3281409..c3ca3c6 100644 --- a/alr_envs/mujoco/beerpong/beerpong_reward.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward.py @@ -1,5 +1,5 @@ import numpy as np -from alr_envs.mujoco import alr_reward_fct +from alr_envs.alr.mujoco import alr_reward_fct class BeerpongReward(alr_reward_fct.AlrReward): diff --git a/alr_envs/mujoco/beerpong/beerpong_reward_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py similarity index 99% rename from alr_envs/mujoco/beerpong/beerpong_reward_simple.py rename to alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py index b79e963..fbe2163 100644 --- a/alr_envs/mujoco/beerpong/beerpong_reward_simple.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_simple.py @@ -1,5 +1,5 @@ import numpy as np -from alr_envs.mujoco import alr_reward_fct +from alr_envs.alr.mujoco import alr_reward_fct class BeerpongReward(alr_reward_fct.AlrReward): diff --git a/alr_envs/mujoco/beerpong/beerpong_simple.py b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py similarity index 90% rename from alr_envs/mujoco/beerpong/beerpong_simple.py rename to alr_envs/alr/mujoco/beerpong/beerpong_simple.py index 7a0908d..73da83d 100644 --- a/alr_envs/mujoco/beerpong/beerpong_simple.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_simple.py @@ -1,11 +1,13 @@ from gym import utils import os import numpy as np -from alr_envs.mujoco import alr_mujoco_env +from gym.envs.mujoco import MujocoEnv -class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): +class ALRBeerpongEnv(MujocoEnv, utils.EzPickle): def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): + utils.EzPickle.__init__(**locals()) + self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -26,16 +28,17 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): self.context = None - utils.EzPickle.__init__(self) - alr_mujoco_env.AlrMujocoEnv.__init__(self, - self.xml_path, - apply_gravity_comp=apply_gravity_comp, - n_substeps=n_substeps) + MujocoEnv.__init__(self, model_path=self.xml_path, frame_skip=n_substeps) + + # alr_mujoco_env.AlrMujocoEnv.__init__(self, + # self.xml_path, + # apply_gravity_comp=apply_gravity_comp, + # n_substeps=n_substeps) self.sim_time = 8 # seconds self.sim_steps = int(self.sim_time / self.dt) if reward_function is None: - from alr_envs.mujoco.beerpong.beerpong_reward_simple import BeerpongReward + from alr_envs.alr.mujoco.beerpong.beerpong_reward_simple import BeerpongReward reward_function = BeerpongReward self.reward_function = reward_function(self.sim, self.sim_steps) self.cup_robot_id = self.sim.model._site_name2id["cup_robot_final"] diff --git a/alr_envs/mujoco/beerpong/utils.py b/alr_envs/alr/mujoco/beerpong/utils.py similarity index 91% rename from alr_envs/mujoco/beerpong/utils.py rename to alr_envs/alr/mujoco/beerpong/utils.py index 37d2ad1..f43e881 100644 --- a/alr_envs/mujoco/beerpong/utils.py +++ b/alr_envs/alr/mujoco/beerpong/utils.py @@ -1,6 +1,6 @@ from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper -from alr_envs.mujoco.beerpong.beerpong import ALRBeerpongEnv -from alr_envs.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple +from alr_envs.alr.mujoco.beerpong.beerpong import ALRBeerpongEnv +from alr_envs.alr.mujoco.beerpong.beerpong_simple import ALRBeerpongEnv as ALRBeerpongEnvSimple def make_contextual_env(rank, seed=0): @@ -26,7 +26,7 @@ def make_contextual_env(rank, seed=0): return _init -def make_env(rank, seed=0): +def _make_env(rank, seed=0): """ Utility function for multiprocessed env. diff --git a/alr_envs/mujoco/gym_table_tennis/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/__init__.py similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/__init__.py rename to alr_envs/alr/mujoco/gym_table_tennis/__init__.py diff --git a/alr_envs/mujoco/gym_table_tennis/envs/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/__init__.py rename to alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/include_table.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/include_table.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/shared.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/shared.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml rename to alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml diff --git a/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py similarity index 98% rename from alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py rename to alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py index f155f06..9b16ae1 100644 --- a/alr_envs/mujoco/gym_table_tennis/envs/table_tennis_env.py +++ b/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py @@ -2,9 +2,9 @@ import numpy as np from gym import spaces from gym.envs.robotics import robot_env, utils # import xml.etree.ElementTree as ET -from alr_envs.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis +from alr_envs.alr.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis import glfw -from alr_envs.mujoco.gym_table_tennis.utils.experiment import ball_initialize +from alr_envs.alr.mujoco.gym_table_tennis.utils.experiment import ball_initialize from pathlib import Path import os diff --git a/alr_envs/mujoco/gym_table_tennis/utils/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/utils/__init__.py rename to alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py diff --git a/alr_envs/mujoco/gym_table_tennis/utils/experiment.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py similarity index 96% rename from alr_envs/mujoco/gym_table_tennis/utils/experiment.py rename to alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py index addd6c5..a106d68 100644 --- a/alr_envs/mujoco/gym_table_tennis/utils/experiment.py +++ b/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py @@ -1,6 +1,6 @@ import numpy as np from gym.utils import seeding -from alr_envs.mujoco.gym_table_tennis.utils.util import read_yaml, read_json +from alr_envs.alr.mujoco.gym_table_tennis.utils.util import read_yaml, read_json from pathlib import Path diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/utils/rewards/__init__.py rename to alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py rename to alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py diff --git a/alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/utils/rewards/rewards.py rename to alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py diff --git a/alr_envs/mujoco/gym_table_tennis/utils/util.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py similarity index 92% rename from alr_envs/mujoco/gym_table_tennis/utils/util.py rename to alr_envs/alr/mujoco/gym_table_tennis/utils/util.py index 716b3c6..fa308e3 100644 --- a/alr_envs/mujoco/gym_table_tennis/utils/util.py +++ b/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py @@ -39,7 +39,7 @@ def config_save(dir_path, config): def change_kp_in_xml(kp_list, - model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/envs/robotics/assets/table_tennis/right_arm_actuator.xml"): + model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/simple_reacher/robotics/assets/table_tennis/right_arm_actuator.xml"): tree = ET.parse(model_path) root = tree.getroot() # for actuator in root.find("actuator"): diff --git a/alr_envs/mujoco/meshes/wam/base_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/base_link_convex.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/base_link_convex.stl rename to alr_envs/alr/mujoco/meshes/wam/base_link_convex.stl diff --git a/alr_envs/mujoco/meshes/wam/base_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/base_link_fine.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/base_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/base_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/cup.stl b/alr_envs/alr/mujoco/meshes/wam/cup.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup.stl rename to alr_envs/alr/mujoco/meshes/wam/cup.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split1.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split1.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split1.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split1.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split10.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split10.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split10.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split10.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split11.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split11.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split11.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split11.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split12.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split12.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split12.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split12.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split13.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split13.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split13.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split13.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split14.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split14.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split14.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split14.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split15.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split15.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split15.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split15.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split16.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split16.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split16.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split16.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split17.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split17.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split17.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split17.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split18.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split18.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split18.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split18.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split2.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split2.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split2.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split2.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split3.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split3.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split3.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split3.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split4.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split4.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split4.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split4.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split5.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split5.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split5.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split5.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split6.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split6.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split6.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split6.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split7.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split7.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split7.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split7.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split8.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split8.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split8.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split8.stl diff --git a/alr_envs/mujoco/meshes/wam/cup_split9.stl b/alr_envs/alr/mujoco/meshes/wam/cup_split9.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/cup_split9.stl rename to alr_envs/alr/mujoco/meshes/wam/cup_split9.stl diff --git a/alr_envs/mujoco/meshes/wam/elbow_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/elbow_link_convex.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/elbow_link_convex.stl rename to alr_envs/alr/mujoco/meshes/wam/elbow_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/elbow_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/elbow_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/meshes/wam/forearm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/meshes/wam/forearm_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/forearm_link_fine.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/forearm_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/forearm_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl rename to alr_envs/alr/mujoco/meshes/wam/shoulder_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/meshes/wam/shoulder_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_link_fine.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/shoulder_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/shoulder_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/shoulder_pitch_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_convex.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/shoulder_pitch_link_convex.stl rename to alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_convex.stl diff --git a/alr_envs/mujoco/meshes/wam/shoulder_pitch_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_fine.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/shoulder_pitch_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/shoulder_pitch_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/meshes/wam/upper_arm_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/meshes/wam/upper_arm_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/upper_arm_link_fine.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/upper_arm_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/upper_arm_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/wrist_palm_link_convex.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_convex.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/wrist_palm_link_convex.stl rename to alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_convex.stl diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_fine.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_fine.stl old mode 100644 new mode 100755 similarity index 100% rename from alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_fine.stl rename to alr_envs/alr/mujoco/meshes/wam/wrist_palm_link_fine.stl diff --git a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl rename to alr_envs/alr/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl diff --git a/alr_envs/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl rename to alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl diff --git a/alr_envs/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl rename to alr_envs/alr/mujoco/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl diff --git a/alr_envs/mujoco/reacher/__init__.py b/alr_envs/alr/mujoco/reacher/__init__.py similarity index 100% rename from alr_envs/mujoco/reacher/__init__.py rename to alr_envs/alr/mujoco/reacher/__init__.py diff --git a/alr_envs/mujoco/reacher/alr_reacher.py b/alr_envs/alr/mujoco/reacher/alr_reacher.py similarity index 100% rename from alr_envs/mujoco/reacher/alr_reacher.py rename to alr_envs/alr/mujoco/reacher/alr_reacher.py diff --git a/alr_envs/mujoco/reacher/assets/reacher_5links.xml b/alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml similarity index 100% rename from alr_envs/mujoco/reacher/assets/reacher_5links.xml rename to alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml diff --git a/alr_envs/mujoco/reacher/assets/reacher_7links.xml b/alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml similarity index 100% rename from alr_envs/mujoco/reacher/assets/reacher_7links.xml rename to alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml diff --git a/alr_envs/mujoco/balancing.py b/alr_envs/alr/mujoco/reacher/balancing.py similarity index 100% rename from alr_envs/mujoco/balancing.py rename to alr_envs/alr/mujoco/reacher/balancing.py diff --git a/alr_envs/classic_control/__init__.py b/alr_envs/classic_control/__init__.py deleted file mode 100644 index 4a26eaa..0000000 --- a/alr_envs/classic_control/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from alr_envs.classic_control.simple_reacher import SimpleReacherEnv -from alr_envs.classic_control.viapoint_reacher import ViaPointReacher -from alr_envs.classic_control.hole_reacher import HoleReacherEnv diff --git a/alr_envs/dmc/README.MD b/alr_envs/dmc/README.MD new file mode 100644 index 0000000..791ee84 --- /dev/null +++ b/alr_envs/dmc/README.MD @@ -0,0 +1,19 @@ +# DeepMind Control (DMC) Wrappers + +These are the Environment Wrappers for selected +[DeepMind Control](https://deepmind.com/research/publications/2020/dm-control-Software-and-Tasks-for-Continuous-Control) +environments in order to use our Motion Primitive gym interface with them. + +## MP Environments + +[//]: <> (These environments are wrapped-versions of their Deep Mind Control Suite (DMC) counterparts. Given most task can be) +[//]: <> (solved in shorter horizon lengths than the original 1000 steps, we often shorten the episodes for those task.) + +|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension +|---|---|---|---|---| +|`dmc_ball_in_cup-catch_detpmp-v0`| A DetPmP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000 | 10 | 2 +|`dmc_ball_in_cup-catch_dmp-v0`| A DMP wrapped version of the "catch" task for the "ball_in_cup" environment. | 1000| 10 | 2 +|`dmc_reacher-easy_detpmp-v0`| A DetPmP wrapped version of the "easy" task for the "reacher" environment. | 1000 | 10 | 4 +|`dmc_reacher-easy_dmp-v0`| A DMP wrapped version of the "easy" task for the "reacher" environment. | 1000| 10 | 4 +|`dmc_reacher-hard_detpmp-v0`| A DetPmP wrapped version of the "hard" task for the "reacher" environment.| 1000 | 10 | 4 +|`dmc_reacher-hard_dmp-v0`| A DMP wrapped version of the "hard" task for the "reacher" environment. | 1000 | 10 | 4 diff --git a/alr_envs/dmc/__init__.py b/alr_envs/dmc/__init__.py new file mode 100644 index 0000000..17d1f7f --- /dev/null +++ b/alr_envs/dmc/__init__.py @@ -0,0 +1,385 @@ +from . import manipulation, suite + +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} + +from gym.envs.registration import register + +# DeepMind Control Suite (DMC) + +register( + id=f'dmc_ball_in_cup-catch_dmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"ball_in_cup-catch", + "time_limit": 20, + "episode_length": 1000, + "wrappers": [suite.ball_in_cup.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 20, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 50, + "d_gains": 1 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_ball_in_cup-catch_dmp-v0") + +register( + id=f'dmc_ball_in_cup-catch_detpmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"ball_in_cup-catch", + "time_limit": 20, + "episode_length": 1000, + "wrappers": [suite.ball_in_cup.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 20, + "width": 0.025, + "policy_type": "motor", + "zero_start": True, + "policy_kwargs": { + "p_gains": 50, + "d_gains": 1 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_ball_in_cup-catch_detpmp-v0") + +register( + id=f'dmc_reacher-easy_dmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"reacher-easy", + "time_limit": 20, + "episode_length": 1000, + "wrappers": [suite.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 20, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", + "weights_scale": 50, + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 50, + "d_gains": 1 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-easy_dmp-v0") + +register( + id=f'dmc_reacher-easy_detpmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"reacher-easy", + "time_limit": 20, + "episode_length": 1000, + "wrappers": [suite.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 20, + "width": 0.025, + "policy_type": "motor", + "weights_scale": 0.2, + "zero_start": True, + "policy_kwargs": { + "p_gains": 50, + "d_gains": 1 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_reacher-easy_detpmp-v0") + +register( + id=f'dmc_reacher-hard_dmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"reacher-hard", + "time_limit": 20, + "episode_length": 1000, + "wrappers": [suite.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 20, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", + "weights_scale": 50, + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 50, + "d_gains": 1 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_reacher-hard_dmp-v0") + +register( + id=f'dmc_reacher-hard_detpmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"reacher-hard", + "time_limit": 20, + "episode_length": 1000, + "wrappers": [suite.reacher.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 5, + "duration": 20, + "width": 0.025, + "policy_type": "motor", + "weights_scale": 0.2, + "zero_start": True, + "policy_kwargs": { + "p_gains": 50, + "d_gains": 1 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_reacher-hard_detpmp-v0") + +_dmc_cartpole_tasks = ["balance", "balance_sparse", "swingup", "swingup_sparse"] + +for _task in _dmc_cartpole_tasks: + _env_id = f'dmc_cartpole-{_task}_dmp-v0' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"cartpole-{_task}", + # "time_limit": 1, + "camera_id": 0, + "episode_length": 1000, + "wrappers": [suite.cartpole.MPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 5, + "duration": 10, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", + "weights_scale": 50, + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 10, + "d_gains": 10 + } + } + } + ) + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + + _env_id = f'dmc_cartpole-{_task}_detpmp-v0' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"cartpole-{_task}", + # "time_limit": 1, + "camera_id": 0, + "episode_length": 1000, + "wrappers": [suite.cartpole.MPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 5, + "duration": 10, + "width": 0.025, + "policy_type": "motor", + "weights_scale": 0.2, + "zero_start": True, + "policy_kwargs": { + "p_gains": 10, + "d_gains": 10 + } + } + } + ) + ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +_env_id = f'dmc_cartpole-two_poles_dmp-v0' +register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"cartpole-two_poles", + # "time_limit": 1, + "camera_id": 0, + "episode_length": 1000, + "wrappers": [suite.cartpole.TwoPolesMPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 5, + "duration": 10, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", + "weights_scale": 50, + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 10, + "d_gains": 10 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + +_env_id = f'dmc_cartpole-two_poles_detpmp-v0' +register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"cartpole-two_poles", + # "time_limit": 1, + "camera_id": 0, + "episode_length": 1000, + "wrappers": [suite.cartpole.TwoPolesMPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 5, + "duration": 10, + "width": 0.025, + "policy_type": "motor", + "weights_scale": 0.2, + "zero_start": True, + "policy_kwargs": { + "p_gains": 10, + "d_gains": 10 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +_env_id = f'dmc_cartpole-three_poles_dmp-v0' +register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"cartpole-three_poles", + # "time_limit": 1, + "camera_id": 0, + "episode_length": 1000, + "wrappers": [suite.cartpole.ThreePolesMPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 5, + "duration": 10, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", + "weights_scale": 50, + "goal_scale": 0.1, + "policy_kwargs": { + "p_gains": 10, + "d_gains": 10 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + +_env_id = f'dmc_cartpole-three_poles_detpmp-v0' +register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"cartpole-three_poles", + # "time_limit": 1, + "camera_id": 0, + "episode_length": 1000, + "wrappers": [suite.cartpole.ThreePolesMPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 5, + "duration": 10, + "width": 0.025, + "policy_type": "motor", + "weights_scale": 0.2, + "zero_start": True, + "policy_kwargs": { + "p_gains": 10, + "d_gains": 10 + } + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +# DeepMind Manipulation + +register( + id=f'dmc_manipulation-reach_site_dmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_dmp_env_helper', + # max_episode_steps=1, + kwargs={ + "name": f"manipulation-reach_site_features", + # "time_limit": 1, + "episode_length": 250, + "wrappers": [manipulation.reach_site.MPWrapper], + "mp_kwargs": { + "num_dof": 9, + "num_basis": 5, + "duration": 10, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1, + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("dmc_manipulation-reach_site_dmp-v0") + +register( + id=f'dmc_manipulation-reach_site_detpmp-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": f"manipulation-reach_site_features", + # "time_limit": 1, + "episode_length": 250, + "wrappers": [manipulation.reach_site.MPWrapper], + "mp_kwargs": { + "num_dof": 9, + "num_basis": 5, + "duration": 10, + "width": 0.025, + "policy_type": "velocity", + "weights_scale": 0.2, + "zero_start": True, + } + } +) +ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("dmc_manipulation-reach_site_detpmp-v0") diff --git a/alr_envs/dmc/dmc_wrapper.py b/alr_envs/dmc/dmc_wrapper.py new file mode 100644 index 0000000..10f1af9 --- /dev/null +++ b/alr_envs/dmc/dmc_wrapper.py @@ -0,0 +1,206 @@ +# Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/wrappers.py +# License: MIT +# Copyright (c) 2020 Denis Yarats +import collections +from typing import Any, Dict, Tuple + +import numpy as np +from dm_control import manipulation, suite +from dm_env import specs +from gym import core, spaces + + +def _spec_to_box(spec): + def extract_min_max(s): + assert s.dtype == np.float64 or s.dtype == np.float32, f"Only float64 and float32 types are allowed, instead {s.dtype} was found" + dim = int(np.prod(s.shape)) + if type(s) == specs.Array: + bound = np.inf * np.ones(dim, dtype=np.float32) + return -bound, bound + elif type(s) == specs.BoundedArray: + zeros = np.zeros(dim, dtype=np.float32) + return s.minimum + zeros, s.maximum + zeros + + mins, maxs = [], [] + for s in spec: + mn, mx = extract_min_max(s) + mins.append(mn) + maxs.append(mx) + low = np.concatenate(mins, axis=0) + high = np.concatenate(maxs, axis=0) + assert low.shape == high.shape + return spaces.Box(low, high, dtype=np.float32) + + +def _flatten_obs(obs: collections.MutableMapping): + """ + Flattens an observation of type MutableMapping, e.g. a dict to a 1D array. + Args: + obs: observation to flatten + + Returns: 1D array of observation + + """ + + if not isinstance(obs, collections.MutableMapping): + raise ValueError(f'Requires dict-like observations structure. {type(obs)} found.') + + # Keep key order consistent for non OrderedDicts + keys = obs.keys() if isinstance(obs, collections.OrderedDict) else sorted(obs.keys()) + + obs_vals = [np.array([obs[key]]) if np.isscalar(obs[key]) else obs[key].ravel() for key in keys] + return np.concatenate(obs_vals) + + +class DMCWrapper(core.Env): + def __init__( + self, + domain_name: str, + task_name: str, + task_kwargs: dict = {}, + visualize_reward: bool = True, + from_pixels: bool = False, + height: int = 84, + width: int = 84, + camera_id: int = 0, + frame_skip: int = 1, + environment_kwargs: dict = None, + channels_first: bool = True + ): + assert 'random' in task_kwargs, 'Please specify a seed for deterministic behavior.' + self._from_pixels = from_pixels + self._height = height + self._width = width + self._camera_id = camera_id + self._frame_skip = frame_skip + self._channels_first = channels_first + + # create task + if domain_name == "manipulation": + assert not from_pixels and not task_name.endswith("_vision"), \ + "TODO: Vision interface for manipulation is different to suite and needs to be implemented" + self._env = manipulation.load(environment_name=task_name, seed=task_kwargs['random']) + else: + self._env = suite.load(domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs, + visualize_reward=visualize_reward, environment_kwargs=environment_kwargs) + + # action and observation space + self._action_space = _spec_to_box([self._env.action_spec()]) + self._observation_space = _spec_to_box(self._env.observation_spec().values()) + + self._last_state = None + self.viewer = None + + # set seed + self.seed(seed=task_kwargs.get('random', 1)) + + def __getattr__(self, item): + """Propagate only non-existent properties to wrapped env.""" + if item.startswith('_'): + raise AttributeError("attempted to get missing private attribute '{}'".format(item)) + if item in self.__dict__: + return getattr(self, item) + return getattr(self._env, item) + + def _get_obs(self, time_step): + if self._from_pixels: + obs = self.render( + mode="rgb_array", + height=self._height, + width=self._width, + camera_id=self._camera_id + ) + if self._channels_first: + obs = obs.transpose(2, 0, 1).copy() + else: + obs = _flatten_obs(time_step.observation) + return obs + + @property + def observation_space(self): + return self._observation_space + + @property + def action_space(self): + return self._action_space + + @property + def dt(self): + return self._env.control_timestep() * self._frame_skip + + @property + def base_step_limit(self): + """ + Returns: max_episode_steps of the underlying DMC env + + """ + # Accessing private attribute because DMC does not expose time_limit or step_limit. + # Only the current time_step/time as well as the control_timestep can be accessed. + try: + return (self._env._step_limit + self._frame_skip - 1) // self._frame_skip + except AttributeError as e: + return self._env._time_limit / self.dt + + def seed(self, seed=None): + self._action_space.seed(seed) + self._observation_space.seed(seed) + + def step(self, action) -> Tuple[np.ndarray, float, bool, Dict[str, Any]]: + assert self._action_space.contains(action) + reward = 0 + extra = {'internal_state': self._env.physics.get_state().copy()} + + for _ in range(self._frame_skip): + time_step = self._env.step(action) + reward += time_step.reward or 0. + done = time_step.last() + if done: + break + + self._last_state = _flatten_obs(time_step.observation) + obs = self._get_obs(time_step) + extra['discount'] = time_step.discount + return obs, reward, done, extra + + def reset(self) -> np.ndarray: + time_step = self._env.reset() + self._last_state = _flatten_obs(time_step.observation) + obs = self._get_obs(time_step) + return obs + + def render(self, mode='rgb_array', height=None, width=None, camera_id=0): + if self._last_state is None: + raise ValueError('Environment not ready to render. Call reset() first.') + + camera_id = camera_id or self._camera_id + + # assert mode == 'rgb_array', 'only support rgb_array mode, given %s' % mode + if mode == "rgb_array": + height = height or self._height + width = width or self._width + return self._env.physics.render(height=height, width=width, camera_id=camera_id) + + elif mode == 'human': + if self.viewer is None: + # pylint: disable=import-outside-toplevel + # pylint: disable=g-import-not-at-top + from gym.envs.classic_control import rendering + self.viewer = rendering.SimpleImageViewer() + # Render max available buffer size. Larger is only possible by altering the XML. + img = self._env.physics.render(height=self._env.physics.model.vis.global_.offheight, + width=self._env.physics.model.vis.global_.offwidth, + camera_id=camera_id) + self.viewer.imshow(img) + return self.viewer.isopen + + def close(self): + super().close() + if self.viewer is not None and self.viewer.isopen: + self.viewer.close() + + @property + def reward_range(self) -> Tuple[float, float]: + reward_spec = self._env.reward_spec() + if isinstance(reward_spec, specs.BoundedArray): + return reward_spec.minimum, reward_spec.maximum + return -float('inf'), float('inf') diff --git a/alr_envs/dmc/manipulation/__init__.py b/alr_envs/dmc/manipulation/__init__.py new file mode 100644 index 0000000..f3a4147 --- /dev/null +++ b/alr_envs/dmc/manipulation/__init__.py @@ -0,0 +1 @@ +from . import reach_site diff --git a/alr_envs/dmc/manipulation/reach_site/__init__.py b/alr_envs/dmc/manipulation/reach_site/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/dmc/manipulation/reach_site/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py new file mode 100644 index 0000000..2d03f7b --- /dev/null +++ b/alr_envs/dmc/manipulation/reach_site/mp_wrapper.py @@ -0,0 +1,38 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # Joint and target positions are randomized, velocities are always set to 0. + return np.hstack([ + [True] * 3, # target position + [True] * 12, # sin/cos arm joint position + [True] * 6, # arm joint torques + [False] * 6, # arm joint velocities + [True] * 3, # sin/cos hand joint position + [False] * 3, # hand joint velocities + [True] * 3, # hand pinch site position + [True] * 9, # pinch site rmat + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.env.physics.named.data.qpos[:] + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.physics.named.data.qvel[:] + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/dmc/suite/__init__.py b/alr_envs/dmc/suite/__init__.py new file mode 100644 index 0000000..7d206fc --- /dev/null +++ b/alr_envs/dmc/suite/__init__.py @@ -0,0 +1 @@ +from . import cartpole, ball_in_cup, reacher diff --git a/alr_envs/dmc/suite/ball_in_cup/__init__.py b/alr_envs/dmc/suite/ball_in_cup/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/dmc/suite/ball_in_cup/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py new file mode 100644 index 0000000..fb068b3 --- /dev/null +++ b/alr_envs/dmc/suite/ball_in_cup/mp_wrapper.py @@ -0,0 +1,34 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # Besides the ball position, the environment is always set to 0. + return np.hstack([ + [False] * 2, # cup position + [True] * 2, # ball position + [False] * 2, # cup velocity + [False] * 2, # ball velocity + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return np.hstack([self.env.physics.named.data.qpos['cup_x'], self.env.physics.named.data.qpos['cup_z']]) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return np.hstack([self.env.physics.named.data.qvel['cup_x'], self.env.physics.named.data.qvel['cup_z']]) + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/dmc/suite/cartpole/__init__.py b/alr_envs/dmc/suite/cartpole/__init__.py new file mode 100644 index 0000000..c5f9bee --- /dev/null +++ b/alr_envs/dmc/suite/cartpole/__init__.py @@ -0,0 +1,3 @@ +from .mp_wrapper import MPWrapper +from .mp_wrapper import TwoPolesMPWrapper +from .mp_wrapper import ThreePolesMPWrapper diff --git a/alr_envs/dmc/suite/cartpole/mp_wrapper.py b/alr_envs/dmc/suite/cartpole/mp_wrapper.py new file mode 100644 index 0000000..1ca99f5 --- /dev/null +++ b/alr_envs/dmc/suite/cartpole/mp_wrapper.py @@ -0,0 +1,51 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + def __init__(self, env, n_poles: int = 1): + self.n_poles = n_poles + super().__init__(env) + + + @property + def active_obs(self): + # Besides the ball position, the environment is always set to 0. + return np.hstack([ + [True], # slider position + [True] * 2 * self.n_poles, # sin/cos hinge angles + [True], # slider velocity + [True] * self.n_poles, # hinge velocities + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.env.physics.named.data.qpos["slider"] + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.physics.named.data.qvel["slider"] + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt + + +class TwoPolesMPWrapper(MPWrapper): + + def __init__(self, env): + super().__init__(env, n_poles=2) + + +class ThreePolesMPWrapper(MPWrapper): + + def __init__(self, env): + super().__init__(env, n_poles=3) diff --git a/alr_envs/dmc/suite/reacher/__init__.py b/alr_envs/dmc/suite/reacher/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/dmc/suite/reacher/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/dmc/suite/reacher/mp_wrapper.py b/alr_envs/dmc/suite/reacher/mp_wrapper.py new file mode 100644 index 0000000..86bc992 --- /dev/null +++ b/alr_envs/dmc/suite/reacher/mp_wrapper.py @@ -0,0 +1,33 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + # Joint and target positions are randomized, velocities are always set to 0. + return np.hstack([ + [True] * 2, # joint position + [True] * 2, # target position + [False] * 2, # joint velocity + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.env.physics.named.data.qpos[:] + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + return self.env.physics.named.data.qvel[:] + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/stochastic_search/functions/__init__.py b/alr_envs/examples/__init__.py similarity index 100% rename from alr_envs/stochastic_search/functions/__init__.py rename to alr_envs/examples/__init__.py diff --git a/alr_envs/examples/examples_dmc.py b/alr_envs/examples/examples_dmc.py new file mode 100644 index 0000000..2d310c4 --- /dev/null +++ b/alr_envs/examples/examples_dmc.py @@ -0,0 +1,135 @@ +import alr_envs + + +def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + env = alr_envs.make(env_id, seed) + rewards = 0 + obs = env.reset() + print("observation shape:", env.observation_space.shape) + print("action shape:", env.action_space.shape) + + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if render: + env.render("human") + + if done: + print(env_id, rewards) + rewards = 0 + obs = env.reset() + + env.close() + del env + + +def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): + """ + Example for running a custom motion primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the motion primitives. + Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/alr_envs/ + Args: + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + + # Base DMC name, according to structure of above example + base_env = "ball_in_cup-catch" + + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. + # You can also add other gym.Wrappers in case they are needed. + wrappers = [alr_envs.dmc.suite.ball_in_cup.MPWrapper] + mp_kwargs = { + "num_dof": 2, # degrees of fredom a.k.a. the old action space dimensionality + "num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis + "duration": 20, # length of trajectory in s, number of steps = duration / dt + "learn_goal": True, # learn the goal position (recommended) + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "motor", # controller type, 'velocity', 'position', and 'motor' (torque control) + "weights_scale": 1, # scaling of MP weights + "goal_scale": 1, # scaling of learned goal position + "policy_kwargs": { # only required for torque control/PD-Controller + "p_gains": 0.2, + "d_gains": 0.05 + } + } + kwargs = { + "time_limit": 20, # same as duration value but as max horizon for underlying DMC environment + "episode_length": 1000, # corresponding number of episode steps + # "frame_skip": 1 + } + env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs) + # OR for a deterministic ProMP (other mp_kwargs are required, see metaworld_examples): + # env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_args) + + # This renders the full MP trajectory + # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + # Resetting to no rendering, can be achieved by render(mode=None). + # It is also possible to change them mode multiple times when + # e.g. only every nth trajectory should be displayed. + if render: + env.render(mode="human") + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(base_env, rewards) + rewards = 0 + obs = env.reset() + + env.close() + del env + + +if __name__ == '__main__': + # Disclaimer: DMC environments require the seed to be specified in the beginning. + # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics. + + # For rendering DMC + # export MUJOCO_GL="osmesa" + render = False + + # # Standard DMC Suite tasks + example_dmc("fish-swim", seed=10, iterations=1000, render=render) + + # Manipulation tasks + # Disclaimer: The vision versions are currently not integrated and yield an error + example_dmc("manipulation-reach_site_features", seed=10, iterations=250, render=render) + + # Gym + DMC hybrid task provided in the MP framework + example_dmc("dmc_ball_in_cup-catch_detpmp-v0", seed=10, iterations=1, render=render) + + # Custom DMC task + # Different seed, because the episode is longer for this example and the name+seed combo is already registered above + example_custom_dmc_and_mp(seed=11, iterations=1, render=render) diff --git a/alr_envs/examples/examples_general.py b/alr_envs/examples/examples_general.py new file mode 100644 index 0000000..d043d6d --- /dev/null +++ b/alr_envs/examples/examples_general.py @@ -0,0 +1,102 @@ +from collections import defaultdict + +import gym +import numpy as np + +import alr_envs + + +def example_general(env_id="Pendulum-v0", seed=1, iterations=1000, render=True): + """ + Example for running any env in the step based setting. + This also includes DMC environments when leveraging our custom make_env function. + + Args: + env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + + env = alr_envs.make(env_id, seed) + rewards = 0 + obs = env.reset() + print("Observation shape: ", env.observation_space.shape) + print("Action shape: ", env.action_space.shape) + + # number of environment steps + for i in range(iterations): + obs, reward, done, info = env.step(env.action_space.sample()) + rewards += reward + + if render: + env.render() + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=4, seed=int('533D', 16), n_samples=800): + """ + Example for running any env in a vectorized multiprocessing setting to generate more samples faster. + This also includes DMC and DMP environments when leveraging our custom make_env function. + Be aware, increasing the number of environments reduces the total length of the individual episodes. + + Args: + env_id: OpenAI/Custom gym task id or either `domain_name-task_name` or `manipulation-environment_name` for DMC tasks + seed: seed for deterministic behaviour + n_cpu: Number of cpus cores to use in parallel + n_samples: number of samples generated in total by all environments. + + Returns: Tuple of (obs, reward, done, info) with type np.ndarray + + """ + env = gym.vector.AsyncVectorEnv([alr_envs.make_rank(env_id, seed, i) for i in range(n_cpu)]) + # OR + # envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed + i) for i in range(n_cpu)]) + + # for plotting + rewards = np.zeros(n_cpu) + buffer = defaultdict(list) + + obs = env.reset() + + # this would generate more samples than requested if n_samples % num_envs != 0 + repeat = int(np.ceil(n_samples / env.num_envs)) + for i in range(repeat): + obs, reward, done, info = env.step(env.action_space.sample()) + buffer['obs'].append(obs) + buffer['reward'].append(reward) + buffer['done'].append(done) + buffer['info'].append(info) + rewards += reward + if np.any(done): + print(f"Reward at iteration {i}: {rewards[done]}") + rewards[done] = 0 + + # do not return values above threshold + return (*map(lambda v: np.stack(v)[:n_samples], buffer.values()),) + + +if __name__ == '__main__': + render = True + + # Basic gym task + example_general("Pendulum-v0", seed=10, iterations=200, render=render) + + # # Basis task from framework + example_general("alr_envs:HoleReacher-v0", seed=10, iterations=200, render=render) + + # # OpenAI Mujoco task + example_general("HalfCheetah-v2", seed=10, render=render) + + # # Mujoco task from framework + example_general("alr_envs:ALRReacher-v0", seed=10, iterations=200, render=render) + + # Vectorized multiprocessing environments + example_async(env_id="alr_envs:HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) diff --git a/alr_envs/examples/examples_metaworld.py b/alr_envs/examples/examples_metaworld.py new file mode 100644 index 0000000..e88ed6c --- /dev/null +++ b/alr_envs/examples/examples_metaworld.py @@ -0,0 +1,128 @@ +import alr_envs + + +def example_dmc(env_id="fish-swim", seed=1, iterations=1000, render=True): + """ + Example for running a MetaWorld based env in the step based setting. + The env_id has to be specified as `task_name-v2`. V1 versions are not supported and we always + return the observable goal version. + All tasks can be found here: https://arxiv.org/pdf/1910.10897.pdf or https://meta-world.github.io/ + + Args: + env_id: `task_name-v2` + seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code) + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + env = alr_envs.make(env_id, seed) + rewards = 0 + obs = env.reset() + print("observation shape:", env.observation_space.shape) + print("action shape:", env.action_space.shape) + + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if render: + # THIS NEEDS TO BE SET TO FALSE FOR NOW, BECAUSE THE INTERFACE FOR RENDERING IS DIFFERENT TO BASIC GYM + # TODO: Remove this, when Metaworld fixes its interface. + env.render(False) + + if done: + print(env_id, rewards) + rewards = 0 + obs = env.reset() + + env.close() + del env + + +def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): + """ + Example for running a custom motion primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the motion primitives. + Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/alr_envs/ + Args: + seed: seed for deterministic behaviour (TODO: currently not working due to an issue in MetaWorld code) + iterations: Number of rollout steps to run + render: Render the episode (TODO: currently not working due to an issue in MetaWorld code) + + Returns: + + """ + + # Base MetaWorld name, according to structure of above example + base_env = "button-press-v2" + + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. + # You can also add other gym.Wrappers in case they are needed. + wrappers = [alr_envs.meta.goal_and_object_change.MPWrapper] + mp_kwargs = { + "num_dof": 4, # degrees of fredom a.k.a. the old action space dimensionality + "num_basis": 5, # number of basis functions, the new action space has size num_dof x num_basis + "duration": 6.25, # length of trajectory in s, number of steps = duration / dt + "post_traj_time": 0, # pad trajectory with additional zeros at the end (recommended: 0) + "width": 0.025, # width of the basis functions + "zero_start": True, # start from current environment position if True + "weights_scale": 1, # scaling of MP weights + "policy_type": "metaworld", # custom controller type for metaworld environments + } + + env = alr_envs.make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + # OR for a DMP (other mp_kwargs are required, see dmc_examples): + # env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs, **kwargs) + + # This renders the full MP trajectory + # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + # Resetting to no rendering, can be achieved by render(mode=None). + # It is also possible to change them mode multiple times when + # e.g. only every nth trajectory should be displayed. + if render: + raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. " + "A temporary workaround is to alter their code in MujocoEnv render() from " + "`if not offscreen` to `if not offscreen or offscreen == 'human'`.") + # TODO: Remove this, when Metaworld fixes its interface. + # env.render(mode="human") + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(base_env, rewards) + rewards = 0 + obs = env.reset() + + env.close() + del env + + +if __name__ == '__main__': + # Disclaimer: MetaWorld environments require the seed to be specified in the beginning. + # Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior. + + # For rendering it might be necessary to specify your OpenGL installation + # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so + render = False + + # # Standard DMC Suite tasks + example_dmc("button-press-v2", seed=10, iterations=500, render=render) + + # MP + MetaWorld hybrid task provided in the our framework + example_dmc("ButtonPressDetPMP-v2", seed=10, iterations=1, render=render) + + # Custom MetaWorld task + example_custom_dmc_and_mp(seed=10, iterations=1, render=render) diff --git a/alr_envs/examples/examples_motion_primitives.py b/alr_envs/examples/examples_motion_primitives.py new file mode 100644 index 0000000..6decdb1 --- /dev/null +++ b/alr_envs/examples/examples_motion_primitives.py @@ -0,0 +1,161 @@ +import alr_envs + + +def example_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True): + """ + Example for running a motion primitive based environment, which is already registered + Args: + env_name: DMP env_id + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + # While in this case gym.make() is possible to use as well, we recommend our custom make env function. + # First, it already takes care of seeding and second enables the use of DMC tasks within the gym interface. + env = alr_envs.make(env_name, seed) + + rewards = 0 + # env.render(mode=None) + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + + if render and i % 2 == 0: + # This renders the full MP trajectory + # It is only required to call render() once in the beginning, which renders every consecutive trajectory. + # Resetting to no rendering, can be achieved by render(mode=None). + # It is also possible to change the mode multiple times when + # e.g. only every second trajectory should be displayed, such as here + # Just make sure the correct mode is set before executing the step. + env.render(mode="human") + else: + env.render(mode=None) + + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_custom_mp(env_name="alr_envs:HoleReacherDMP-v1", seed=1, iterations=1, render=True): + """ + Example for running a motion primitive based environment, which is already registered + Args: + env_name: DMP env_id + seed: seed for deterministic behaviour + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + # Changing the mp_kwargs is possible by providing them to gym. + # E.g. here by providing way to many basis functions + mp_kwargs = { + "num_dof": 5, + "num_basis": 1000, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + env = alr_envs.make(env_name, seed, mp_kwargs=mp_kwargs) + + # This time rendering every trajectory + if render: + env.render(mode="human") + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +def example_fully_custom_mp(seed=1, iterations=1, render=True): + """ + Example for running a custom motion primitive based environments. + Our already registered environments follow the same structure. + Hence, this also allows to adjust hyperparameters of the motion primitives. + Yet, we recommend the method above if you are just interested in chaining those parameters for existing tasks. + We appreciate PRs for custom environments (especially MP wrappers of existing tasks) + for our repo: https://github.com/ALRhub/alr_envs/ + Args: + seed: seed + iterations: Number of rollout steps to run + render: Render the episode + + Returns: + + """ + + base_env = "alr_envs:HoleReacher-v1" + + # Replace this wrapper with the custom wrapper for your environment by inheriting from the MPEnvWrapper. + # You can also add other gym.Wrappers in case they are needed. + wrappers = [alr_envs.classic_control.hole_reacher.MPWrapper] + mp_kwargs = { + "num_dof": 5, + "num_basis": 5, + "duration": 2, + "learn_goal": True, + "alpha_phase": 2, + "bandwidth_factor": 2, + "policy_type": "velocity", + "weights_scale": 50, + "goal_scale": 0.1 + } + env = alr_envs.make_dmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + # OR for a deterministic ProMP: + # env = make_detpmp_env(base_env, wrappers=wrappers, seed=seed, mp_kwargs=mp_kwargs) + + if render: + env.render(mode="human") + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(iterations): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +if __name__ == '__main__': + render = False + # DMP + example_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + + # DetProMP + example_mp("alr_envs:HoleReacherDetPMP-v1", seed=10, iterations=1, render=render) + + # Altered basis functions + example_custom_mp("alr_envs:HoleReacherDMP-v1", seed=10, iterations=1, render=render) + + # Custom MP + example_fully_custom_mp(seed=10, iterations=1, render=render) diff --git a/alr_envs/examples/examples_open_ai.py b/alr_envs/examples/examples_open_ai.py new file mode 100644 index 0000000..9f90be5 --- /dev/null +++ b/alr_envs/examples/examples_open_ai.py @@ -0,0 +1,41 @@ +import alr_envs + + +def example_mp(env_name, seed=1): + """ + Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. + For more information on motion primitive specific stuff, look at the mp examples. + Args: + env_name: DetPMP env_id + seed: seed + + Returns: + + """ + # While in this case gym.make() is possible to use as well, we recommend our custom make env function. + env = alr_envs.make(env_name, seed) + + rewards = 0 + obs = env.reset() + + # number of samples/full trajectories (multiple environment steps) + for i in range(10): + ac = env.action_space.sample() + obs, reward, done, info = env.step(ac) + rewards += reward + + if done: + print(rewards) + rewards = 0 + obs = env.reset() + + +if __name__ == '__main__': + # DMP - not supported yet + # example_mp("ReacherDMP-v2") + + # DetProMP + example_mp("ContinuousMountainCarDetPMP-v0") + example_mp("ReacherDetPMP-v2") + example_mp("FetchReachDenseDetPMP-v1") + example_mp("FetchSlideDenseDetPMP-v1") diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py new file mode 100644 index 0000000..3fb8251 --- /dev/null +++ b/alr_envs/examples/pd_control_gain_tuning.py @@ -0,0 +1,72 @@ +import numpy as np +from matplotlib import pyplot as plt + +from alr_envs import dmc, meta +from alr_envs.utils.make_env_helpers import make_detpmp_env + +# This might work for some environments, however, please verify either way the correct trajectory information +# for your environment are extracted below +SEED = 10 +env_id = "ball_in_cup-catch" +wrappers = [dmc.ball_in_cup.MPWrapper] + +mp_kwargs = { + "num_dof": 2, + "num_basis": 10, + "duration": 2, + "width": 0.025, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True, + "policy_kwargs": { + "p_gains": 1, + "d_gains": 1 + } +} + +kwargs = dict(time_limit=2, episode_length=100) + +env = make_detpmp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, + **kwargs) + +# Plot difference between real trajectory and target MP trajectory +env.reset() +pos, vel = env.mp_rollout(env.action_space.sample()) + +base_shape = env.full_action_space.shape +actual_pos = np.zeros((len(pos), *base_shape)) +actual_vel = np.zeros((len(pos), *base_shape)) +act = np.zeros((len(pos), *base_shape)) + +for t, pos_vel in enumerate(zip(pos, vel)): + actions = env.policy.get_action(pos_vel[0], pos_vel[1]) + actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high) + _, _, _, _ = env.env.step(actions) + act[t, :] = actions + # TODO verify for your environment + actual_pos[t, :] = env.current_pos + actual_vel[t, :] = env.current_vel + +plt.figure(figsize=(15, 5)) + +plt.subplot(131) +plt.title("Position") +plt.plot(actual_pos, c='C0', label=["true" if i == 0 else "" for i in range(np.prod(base_shape))]) +# plt.plot(actual_pos_ball, label="true pos ball") +plt.plot(pos, c='C1', label=["MP" if i == 0 else "" for i in range(np.prod(base_shape))]) +plt.xlabel("Episode steps") +plt.legend() + +plt.subplot(132) +plt.title("Velocity") +plt.plot(actual_vel, c='C0', label=[f"true" if i == 0 else "" for i in range(np.prod(base_shape))]) +plt.plot(vel, c='C1', label=[f"MP" if i == 0 else "" for i in range(np.prod(base_shape))]) +plt.xlabel("Episode steps") +plt.legend() + +plt.subplot(133) +plt.title("Actions") +plt.plot(act, c="C0"), # label=[f"actions" if i == 0 else "" for i in range(np.prod(base_action_shape))]) +plt.xlabel("Episode steps") +# plt.legend() +plt.show() diff --git a/alr_envs/meta/README.MD b/alr_envs/meta/README.MD new file mode 100644 index 0000000..c8d9cd1 --- /dev/null +++ b/alr_envs/meta/README.MD @@ -0,0 +1,26 @@ +# MetaWorld Wrappers + +These are the Environment Wrappers for selected [Metaworld](https://meta-world.github.io/) environments in order to use our Motion Primitive gym interface with them. +All Metaworld environments have a 39 dimensional observation space with the same structure. The tasks differ only in the objective and the initial observations that are randomized. +Unused observations are zeroed out. E.g. for `Button-Press-v2` the observation mask looks the following: +```python + return np.hstack([ + # Current observation + [False] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [True] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) +``` +For other tasks only the boolean values have to be adjusted accordingly. \ No newline at end of file diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py new file mode 100644 index 0000000..fa63c94 --- /dev/null +++ b/alr_envs/meta/__init__.py @@ -0,0 +1,115 @@ +from gym import register + +from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ + object_change_mp_wrapper + +ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} + +# MetaWorld + +_goal_change_envs = ["assembly-v2", "pick-out-of-hole-v2", "plate-slide-v2", "plate-slide-back-v2", + "plate-slide-side-v2", "plate-slide-back-side-v2"] +for _task in _goal_change_envs: + task_id_split = _task.split("-") + name = "".join([s.capitalize() for s in task_id_split[:-1]]) + _env_id = f'{name}DetPMP-{task_id_split[-1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": _task, + "wrappers": [goal_change_mp_wrapper.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 6.25, + "post_traj_time": 0, + "width": 0.025, + "zero_start": True, + "policy_type": "metaworld", + } + } + ) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +_object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"] +for _task in _object_change_envs: + task_id_split = _task.split("-") + name = "".join([s.capitalize() for s in task_id_split[:-1]]) + _env_id = f'{name}DetPMP-{task_id_split[-1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": _task, + "wrappers": [object_change_mp_wrapper.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 6.25, + "post_traj_time": 0, + "width": 0.025, + "zero_start": True, + "policy_type": "metaworld", + } + } + ) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +_goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2", + "button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2", + "coffee-push-v2", "dial-turn-v2", "disassemble-v2", "door-close-v2", + "door-lock-v2", "door-open-v2", "door-unlock-v2", "hand-insert-v2", + "drawer-close-v2", "drawer-open-v2", "faucet-open-v2", "faucet-close-v2", + "handle-press-side-v2", "handle-press-v2", "handle-pull-side-v2", + "handle-pull-v2", "lever-pull-v2", "peg-insert-side-v2", "pick-place-wall-v2", + "reach-v2", "push-back-v2", "push-v2", "pick-place-v2", "peg-unplug-side-v2", + "soccer-v2", "stick-push-v2", "stick-pull-v2", "push-wall-v2", "reach-wall-v2", + "shelf-place-v2", "sweep-v2", "window-open-v2", "window-close-v2" + ] +for _task in _goal_and_object_change_envs: + task_id_split = _task.split("-") + name = "".join([s.capitalize() for s in task_id_split[:-1]]) + _env_id = f'{name}DetPMP-{task_id_split[-1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": _task, + "wrappers": [goal_object_change_mp_wrapper.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 6.25, + "post_traj_time": 0, + "width": 0.025, + "zero_start": True, + "policy_type": "metaworld", + } + } + ) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) + +_goal_and_endeffector_change_envs = ["basketball-v2"] +for _task in _goal_and_endeffector_change_envs: + task_id_split = _task.split("-") + name = "".join([s.capitalize() for s in task_id_split[:-1]]) + _env_id = f'{name}DetPMP-{task_id_split[-1]}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": _task, + "wrappers": [goal_endeffector_change_mp_wrapper.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 6.25, + "post_traj_time": 0, + "width": 0.025, + "zero_start": True, + "policy_type": "metaworld", + } + } + ) + ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append(_env_id) diff --git a/alr_envs/meta/goal_change_mp_wrapper.py b/alr_envs/meta/goal_change_mp_wrapper.py new file mode 100644 index 0000000..a558365 --- /dev/null +++ b/alr_envs/meta/goal_change_mp_wrapper.py @@ -0,0 +1,68 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + """ + This Wrapper is for environments where merely the goal changes in the beginning + and no secondary objects or end effectors are altered at the start of an episode. + You can verify this by executing the code below for your environment id and check if the output is non-zero + at the same indices. + ```python + import alr_envs + env = alr_envs.make(env_id, 1) + print(env.reset() - env.reset()) + array([ 0. , 0. , 0. , 0. , 0, + 0 , 0 , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0 , 0 , 0 , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , !=0 , !=0 , !=0]) + ``` + """ + + @property + def active_obs(self): + # This structure is the same for all metaworld environments. + # Only the observations which change could differ + return np.hstack([ + # Current observation + [False] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [False] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + # TODO: Include previous values? According to their source they might be wrong for the first iteration. + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + r_close = self.env.data.get_joint_qpos("r_close") + return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close]) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + raise NotImplementedError("Velocity cannot be retrieved.") + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/meta/goal_endeffector_change_mp_wrapper.py b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py new file mode 100644 index 0000000..8912a72 --- /dev/null +++ b/alr_envs/meta/goal_endeffector_change_mp_wrapper.py @@ -0,0 +1,68 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + """ + This Wrapper is for environments where merely the goal changes in the beginning + and no secondary objects or end effectors are altered at the start of an episode. + You can verify this by executing the code below for your environment id and check if the output is non-zero + at the same indices. + ```python + import alr_envs + env = alr_envs.make(env_id, 1) + print(env.reset() - env.reset()) + array([ !=0 , !=0 , !=0 , 0. , 0., + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , !=0 , !=0 , + !=0 , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , !=0 , !=0 , !=0]) + ``` + """ + + @property + def active_obs(self): + # This structure is the same for all metaworld environments. + # Only the observations which change could differ + return np.hstack([ + # Current observation + [True] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [False] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + # TODO: Include previous values? According to their source they might be wrong for the first iteration. + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + r_close = self.env.data.get_joint_qpos("r_close") + return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close]) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + raise NotImplementedError("Velocity cannot be retrieved.") + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/meta/goal_object_change_mp_wrapper.py b/alr_envs/meta/goal_object_change_mp_wrapper.py new file mode 100644 index 0000000..63e16b7 --- /dev/null +++ b/alr_envs/meta/goal_object_change_mp_wrapper.py @@ -0,0 +1,68 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + """ + This Wrapper is for environments where merely the goal changes in the beginning + and no secondary objects or end effectors are altered at the start of an episode. + You can verify this by executing the code below for your environment id and check if the output is non-zero + at the same indices. + ```python + import alr_envs + env = alr_envs.make(env_id, 1) + print(env.reset() - env.reset()) + array([ 0. , 0. , 0. , 0. , !=0, + !=0 , !=0 , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , !=0 , !=0 , !=0 , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , !=0 , !=0 , !=0]) + ``` + """ + + @property + def active_obs(self): + # This structure is the same for all metaworld environments. + # Only the observations which change could differ + return np.hstack([ + # Current observation + [False] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [True] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + # TODO: Include previous values? According to their source they might be wrong for the first iteration. + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + r_close = self.env.data.get_joint_qpos("r_close") + return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close]) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + raise NotImplementedError("Velocity cannot be retrieved.") + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/meta/object_change_mp_wrapper.py b/alr_envs/meta/object_change_mp_wrapper.py new file mode 100644 index 0000000..4293148 --- /dev/null +++ b/alr_envs/meta/object_change_mp_wrapper.py @@ -0,0 +1,68 @@ +from typing import Tuple, Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + """ + This Wrapper is for environments where merely the goal changes in the beginning + and no secondary objects or end effectors are altered at the start of an episode. + You can verify this by executing the code below for your environment id and check if the output is non-zero + at the same indices. + ```python + import alr_envs + env = alr_envs.make(env_id, 1) + print(env.reset() - env.reset()) + array([ 0. , 0. , 0. , 0. , !=0 , + !=0 , !=0 , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0 , 0 , 0 , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0. , 0. , + 0. , 0. , 0. , 0.]) + ``` + """ + + @property + def active_obs(self): + # This structure is the same for all metaworld environments. + # Only the observations which change could differ + return np.hstack([ + # Current observation + [False] * 3, # end-effector position + [False] * 1, # normalized gripper open distance + [False] * 3, # main object position + [False] * 4, # main object quaternion + [False] * 3, # secondary object position + [False] * 4, # secondary object quaternion + # Previous observation + # TODO: Include previous values? According to their source they might be wrong for the first iteration. + [False] * 3, # previous end-effector position + [False] * 1, # previous normalized gripper open distance + [False] * 3, # previous main object position + [False] * 4, # previous main object quaternion + [False] * 3, # previous second object position + [False] * 4, # previous second object quaternion + # Goal + [True] * 3, # goal position + ]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + r_close = self.env.data.get_joint_qpos("r_close") + return np.hstack([self.env.data.mocap_pos.flatten() / self.env.action_scale, r_close]) + + @property + def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: + raise NotImplementedError("Velocity cannot be retrieved.") + + @property + def goal_pos(self) -> Union[float, int, np.ndarray, Tuple]: + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/mujoco/__init__.py b/alr_envs/mujoco/__init__.py deleted file mode 100644 index eb7b41f..0000000 --- a/alr_envs/mujoco/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from alr_envs.mujoco.reacher.alr_reacher import ALRReacherEnv -from alr_envs.mujoco.balancing import BalancingEnv -from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv diff --git a/alr_envs/mujoco/alr_mujoco_env.py b/alr_envs/mujoco/alr_mujoco_env.py deleted file mode 100644 index 8dbdabb..0000000 --- a/alr_envs/mujoco/alr_mujoco_env.py +++ /dev/null @@ -1,272 +0,0 @@ -from collections import OrderedDict -import os -from abc import abstractmethod - - -from gym import error, spaces -from gym.utils import seeding -import numpy as np -from os import path -import gym - -try: - import mujoco_py -except ImportError as e: - raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e)) - -DEFAULT_SIZE = 500 - - -def convert_observation_to_space(observation): - if isinstance(observation, dict): - space = spaces.Dict(OrderedDict([ - (key, convert_observation_to_space(value)) - for key, value in observation.items() - ])) - elif isinstance(observation, np.ndarray): - low = np.full(observation.shape, -float('inf'), dtype=np.float32) - high = np.full(observation.shape, float('inf'), dtype=np.float32) - space = spaces.Box(low, high, dtype=observation.dtype) - else: - raise NotImplementedError(type(observation), observation) - - return space - - -class AlrMujocoEnv(gym.Env): - """ - Superclass for all MuJoCo environments. - """ - - def __init__(self, model_path, n_substeps, apply_gravity_comp=True): - """ - - Args: - model_path: path to xml file - n_substeps: how many steps mujoco does per call to env.step - use_servo: use actuator defined in xml, use False for direct torque control - """ - if model_path.startswith("/"): - fullpath = model_path - else: - fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) - if not path.exists(fullpath): - raise IOError("File %s does not exist" % fullpath) - self.n_substeps = n_substeps - self.apply_gravity_comp = apply_gravity_comp - self.model = mujoco_py.load_model_from_path(fullpath) - self.sim = mujoco_py.MjSim(self.model, nsubsteps=n_substeps) - self.data = self.sim.data - self.viewer = None - self._viewers = {} - - self.metadata = { - 'render.modes': ['human', 'rgb_array', 'depth_array'], - 'video.frames_per_second': int(np.round(1.0 / self.dt)) - } - - self.init_qpos = self.sim.data.qpos.ravel().copy() - self.init_qvel = self.sim.data.qvel.ravel().copy() - - self._start_pos = None - self._start_vel = None - - self._set_action_space() - - # action = self.action_space.sample() - # observation, _reward, done, _info = self.step(action) - # assert not done - - observation = self._get_obs() # TODO: is calling get_obs enough? should we call reset, or even step? - - self._set_observation_space(observation) - - self.seed() - - @property - def current_pos(self): - """ - By default returns the joint positions of all simulated objects. May be overridden in subclass. - """ - return self.sim.data.qpos - - @property - def current_vel(self): - """ - By default returns the joint velocities of all simulated objects. May be overridden in subclass. - """ - return self.sim.data.qvel - - @property - def start_pos(self): - """ - Start position of the agent, for example joint angles of a Panda robot. Necessary for MP wrapped envs. - """ - return self._start_pos - - @property - def start_vel(self): - """ - Start velocity of the agent. Necessary for MP wrapped envs. - """ - return self._start_vel - - def extend_des_pos(self, des_pos): - """ - In a simplified environment, the actions may only control a subset of all the joints. - Extend the trajectory to match the environments full action space - Args: - des_pos: - - Returns: - - """ - pass - - def extend_des_vel(self, des_vel): - pass - - def _set_action_space(self): - bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) - low, high = bounds.T - self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) - return self.action_space - - def _set_observation_space(self, observation): - self.observation_space = convert_observation_to_space(observation) - return self.observation_space - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - # methods to override: - # ---------------------------- - - @property - @abstractmethod - def active_obs(self): - """Returns boolean mask for each observation entry - whether the observation is returned for the contextual case or not. - This effectively allows to filter unwanted or unnecessary observations from the full step-based case. - """ - return np.ones(self.observation_space.shape, dtype=bool) - - def _get_obs(self): - """Returns the observation. - """ - raise NotImplementedError() - - def reset_model(self): - """ - Reset the robot degrees of freedom (qpos and qvel). - Implement this in each subclass. - """ - raise NotImplementedError - - def viewer_setup(self): - """ - This method is called when the viewer is initialized. - Optionally implement this method, if you need to tinker with camera position - and so forth. - """ - pass - - # ----------------------------- - - def reset(self): - self.sim.reset() - ob = self.reset_model() - return ob - - def set_state(self, qpos, qvel): - assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) - old_state = self.sim.get_state() - new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel, - old_state.act, old_state.udd_state) - self.sim.set_state(new_state) - self.sim.forward() - - @property - def dt(self): - return self.model.opt.timestep * self.n_substeps - - def do_simulation(self, ctrl): - """ - Additionally returns whether there was an error while stepping the simulation - """ - error_in_sim = False - num_actuations = len(ctrl) - if self.apply_gravity_comp: - self.sim.data.ctrl[:num_actuations] = ctrl + self.sim.data.qfrc_bias[:num_actuations].copy() / self.model.actuator_gear[:, 0] - else: - self.sim.data.ctrl[:num_actuations] = ctrl - - try: - self.sim.step() - except mujoco_py.builder.MujocoException as e: - error_in_sim = True - - return error_in_sim - - def render(self, - mode='human', - width=DEFAULT_SIZE, - height=DEFAULT_SIZE, - camera_id=None, - camera_name=None): - if mode == 'rgb_array' or mode == 'depth_array': - if camera_id is not None and camera_name is not None: - raise ValueError("Both `camera_id` and `camera_name` cannot be" - " specified at the same time.") - - no_camera_specified = camera_name is None and camera_id is None - if no_camera_specified: - camera_name = 'track' - - if camera_id is None and camera_name in self.model._camera_name2id: - camera_id = self.model.camera_name2id(camera_name) - - self._get_viewer(mode).render(width, height, camera_id=camera_id) - - if mode == 'rgb_array': - # window size used for old mujoco-py: - data = self._get_viewer(mode).read_pixels(width, height, depth=False) - # original image is upside-down, so flip it - return data[::-1, :, :] - elif mode == 'depth_array': - self._get_viewer(mode).render(width, height) - # window size used for old mujoco-py: - # Extract depth part of the read_pixels() tuple - data = self._get_viewer(mode).read_pixels(width, height, depth=True)[1] - # original image is upside-down, so flip it - return data[::-1, :] - elif mode == 'human': - self._get_viewer(mode).render() - - def close(self): - if self.viewer is not None: - # self.viewer.finish() - self.viewer = None - self._viewers = {} - - def _get_viewer(self, mode): - self.viewer = self._viewers.get(mode) - if self.viewer is None: - if mode == 'human': - self.viewer = mujoco_py.MjViewer(self.sim) - elif mode == 'rgb_array' or mode == 'depth_array': - self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) - - self.viewer_setup() - self._viewers[mode] = self.viewer - return self.viewer - - def get_body_com(self, body_name): - return self.data.get_body_xpos(body_name) - - def state_vector(self): - return np.concatenate([ - self.sim.data.qpos.flat, - self.sim.data.qvel.flat - ]) diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/ball-in-a-cup_base.xml b/alr_envs/mujoco/ball_in_a_cup/assets/ball-in-a-cup_base.xml deleted file mode 100644 index b534205..0000000 --- a/alr_envs/mujoco/ball_in_a_cup/assets/ball-in-a-cup_base.xml +++ /dev/null @@ -1,366 +0,0 @@ - - - diff --git a/alr_envs/mujoco/gym_table_tennis/envs/MUJOCO_LOG.TXT b/alr_envs/mujoco/gym_table_tennis/envs/MUJOCO_LOG.TXT deleted file mode 100644 index 91c2162..0000000 --- a/alr_envs/mujoco/gym_table_tennis/envs/MUJOCO_LOG.TXT +++ /dev/null @@ -1,3 +0,0 @@ -Mon Jan 25 15:45:30 2021 -ERROR: GLEW initalization error: Missing GL version - diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_convex.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_convex.stl deleted file mode 100644 index 3b05c27..0000000 Binary files a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_convex.stl and /dev/null differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl deleted file mode 100644 index 7b2f001..0000000 Binary files a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_fine.stl b/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_fine.stl deleted file mode 100644 index 0a986fa..0000000 Binary files a/alr_envs/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_fine.stl and /dev/null differ diff --git a/alr_envs/mujoco/meshes/wam/elbow_link_fine.stl b/alr_envs/mujoco/meshes/wam/elbow_link_fine.stl deleted file mode 100755 index f6a1515..0000000 Binary files a/alr_envs/mujoco/meshes/wam/elbow_link_fine.stl and /dev/null differ diff --git a/alr_envs/mujoco/meshes/wam/wrist_palm_link_fine.stl b/alr_envs/mujoco/meshes/wam/wrist_palm_link_fine.stl deleted file mode 100755 index f6b41ad..0000000 Binary files a/alr_envs/mujoco/meshes/wam/wrist_palm_link_fine.stl and /dev/null differ diff --git a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl deleted file mode 100755 index c36f88f..0000000 Binary files a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl deleted file mode 100755 index d00cac1..0000000 Binary files a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/mujoco/meshes/wam/wrist_pitch_link_fine.stl deleted file mode 100755 index 13d2f73..0000000 Binary files a/alr_envs/mujoco/meshes/wam/wrist_pitch_link_fine.stl and /dev/null differ diff --git a/alr_envs/mujoco/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/mujoco/meshes/wam/wrist_yaw_link_fine.stl deleted file mode 100755 index 0d95239..0000000 Binary files a/alr_envs/mujoco/meshes/wam/wrist_yaw_link_fine.stl and /dev/null differ diff --git a/alr_envs/open_ai/README.MD b/alr_envs/open_ai/README.MD new file mode 100644 index 0000000..985c093 --- /dev/null +++ b/alr_envs/open_ai/README.MD @@ -0,0 +1,14 @@ +# OpenAI Gym Wrappers + +These are the Environment Wrappers for selected [OpenAI Gym](https://gym.openai.com/) environments to use +the Motion Primitive gym interface for them. + +## MP Environments +These environments are wrapped-versions of their OpenAI-gym counterparts. + +|Name| Description|Trajectory Horizon|Action Dimension|Context Dimension +|---|---|---|---|---| +|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 +|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2 +|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 +|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 diff --git a/alr_envs/open_ai/__init__.py b/alr_envs/open_ai/__init__.py new file mode 100644 index 0000000..51dd712 --- /dev/null +++ b/alr_envs/open_ai/__init__.py @@ -0,0 +1,161 @@ +from gym import register +from gym.wrappers import FlattenObservation + +from . import classic_control, mujoco, robotics + +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "DetPMP": []} + +# Short Continuous Mountain Car +register( + id="MountainCarContinuous-v1", + entry_point="gym.envs.classic_control:Continuous_MountainCarEnv", + max_episode_steps=100, + reward_threshold=90.0, +) + +# Open AI +# Classic Control +register( + id='ContinuousMountainCarDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "alr_envs:MountainCarContinuous-v1", + "wrappers": [classic_control.continuous_mountain_car.MPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 4, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": 1., + "d_gains": 1. + } + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ContinuousMountainCarDetPMP-v1") + +register( + id='ContinuousMountainCarDetPMP-v0', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.classic_control:MountainCarContinuous-v0", + "wrappers": [classic_control.continuous_mountain_car.MPWrapper], + "mp_kwargs": { + "num_dof": 1, + "num_basis": 4, + "duration": 19.98, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": 1., + "d_gains": 1. + } + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ContinuousMountainCarDetPMP-v0") + +register( + id='ReacherDetPMP-v2', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.mujoco:Reacher-v2", + "wrappers": [mujoco.reacher_v2.MPWrapper], + "mp_kwargs": { + "num_dof": 2, + "num_basis": 6, + "duration": 1, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "motor", + "policy_kwargs": { + "p_gains": .6, + "d_gains": .075 + } + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("ReacherDetPMP-v2") + +register( + id='FetchSlideDenseDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchSlideDense-v1", + "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "position" + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchSlideDenseDetPMP-v1") + +register( + id='FetchSlideDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchSlide-v1", + "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "position" + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchSlideDetPMP-v1") + +register( + id='FetchReachDenseDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchReachDense-v1", + "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "position" + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchReachDenseDetPMP-v1") + +register( + id='FetchReachDetPMP-v1', + entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', + kwargs={ + "name": "gym.envs.robotics:FetchReach-v1", + "wrappers": [FlattenObservation, robotics.fetch.MPWrapper], + "mp_kwargs": { + "num_dof": 4, + "num_basis": 5, + "duration": 2, + "post_traj_time": 0, + "width": 0.02, + "zero_start": True, + "policy_type": "position" + } + } +) +ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"].append("FetchReachDetPMP-v1") diff --git a/alr_envs/open_ai/classic_control/__init__.py b/alr_envs/open_ai/classic_control/__init__.py new file mode 100644 index 0000000..b998494 --- /dev/null +++ b/alr_envs/open_ai/classic_control/__init__.py @@ -0,0 +1 @@ +from . import continuous_mountain_car \ No newline at end of file diff --git a/alr_envs/open_ai/classic_control/continuous_mountain_car/__init__.py b/alr_envs/open_ai/classic_control/continuous_mountain_car/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/open_ai/classic_control/continuous_mountain_car/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py new file mode 100644 index 0000000..2a2357a --- /dev/null +++ b/alr_envs/open_ai/classic_control/continuous_mountain_car/mp_wrapper.py @@ -0,0 +1,22 @@ +from typing import Union + +import numpy as np +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[1]]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return np.array([self.state[0]]) + + @property + def goal_pos(self): + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return 0.02 \ No newline at end of file diff --git a/alr_envs/open_ai/mujoco/__init__.py b/alr_envs/open_ai/mujoco/__init__.py new file mode 100644 index 0000000..3082d19 --- /dev/null +++ b/alr_envs/open_ai/mujoco/__init__.py @@ -0,0 +1 @@ +from . import reacher_v2 diff --git a/alr_envs/open_ai/mujoco/reacher_v2/__init__.py b/alr_envs/open_ai/mujoco/reacher_v2/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/open_ai/mujoco/reacher_v2/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py new file mode 100644 index 0000000..16202e5 --- /dev/null +++ b/alr_envs/open_ai/mujoco/reacher_v2/mp_wrapper.py @@ -0,0 +1,19 @@ +from typing import Union + +import numpy as np +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qvel[:2] + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + return self.sim.data.qpos[:2] + + @property + def dt(self) -> Union[float, int]: + return self.env.dt \ No newline at end of file diff --git a/alr_envs/open_ai/robotics/__init__.py b/alr_envs/open_ai/robotics/__init__.py new file mode 100644 index 0000000..13c65b3 --- /dev/null +++ b/alr_envs/open_ai/robotics/__init__.py @@ -0,0 +1 @@ +from . import fetch \ No newline at end of file diff --git a/alr_envs/open_ai/robotics/fetch/__init__.py b/alr_envs/open_ai/robotics/fetch/__init__.py new file mode 100644 index 0000000..989b5a9 --- /dev/null +++ b/alr_envs/open_ai/robotics/fetch/__init__.py @@ -0,0 +1 @@ +from .mp_wrapper import MPWrapper \ No newline at end of file diff --git a/alr_envs/open_ai/robotics/fetch/mp_wrapper.py b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py new file mode 100644 index 0000000..218e175 --- /dev/null +++ b/alr_envs/open_ai/robotics/fetch/mp_wrapper.py @@ -0,0 +1,49 @@ +from typing import Union + +import numpy as np + +from mp_env_api import MPEnvWrapper + + +class MPWrapper(MPEnvWrapper): + + @property + def active_obs(self): + return np.hstack([ + [False] * 3, # achieved goal + [True] * 3, # desired/true goal + [False] * 3, # grip pos + [True, True, False] * int(self.has_object), # object position + [True, True, False] * int(self.has_object), # object relative position + [False] * 2, # gripper state + [False] * 3 * int(self.has_object), # object rotation + [False] * 3 * int(self.has_object), # object velocity position + [False] * 3 * int(self.has_object), # object velocity rotation + [False] * 3, # grip velocity position + [False] * 2, # gripper velocity + ]).astype(bool) + + @property + def current_vel(self) -> Union[float, int, np.ndarray]: + dt = self.sim.nsubsteps * self.sim.model.opt.timestep + grip_velp = self.sim.data.get_site_xvelp("robot0:grip") * dt + # gripper state should be symmetric for left and right. + # They are controlled with only one action for both gripper joints + gripper_state = self.sim.data.get_joint_qvel('robot0:r_gripper_finger_joint') * dt + return np.hstack([grip_velp, gripper_state]) + + @property + def current_pos(self) -> Union[float, int, np.ndarray]: + grip_pos = self.sim.data.get_site_xpos("robot0:grip") + # gripper state should be symmetric for left and right. + # They are controlled with only one action for both gripper joints + gripper_state = self.sim.data.get_joint_qpos('robot0:r_gripper_finger_joint') + return np.hstack([grip_pos, gripper_state]) + + @property + def goal_pos(self): + raise ValueError("Goal position is not available and has to be learnt based on the environment.") + + @property + def dt(self) -> Union[float, int]: + return self.env.dt diff --git a/alr_envs/stochastic_search/__init__.py b/alr_envs/stochastic_search/__init__.py deleted file mode 100644 index 257680f..0000000 --- a/alr_envs/stochastic_search/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from alr_envs.stochastic_search.stochastic_search import StochasticSearchEnv diff --git a/alr_envs/stochastic_search/functions/f_base.py b/alr_envs/stochastic_search/functions/f_base.py deleted file mode 100644 index 31b4323..0000000 --- a/alr_envs/stochastic_search/functions/f_base.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np -import scipy.stats as scistats - -np.seterr(divide='ignore', invalid='ignore') - - -class BaseObjective(object): - def __init__(self, dim, int_opt=None, val_opt=None, alpha=None, beta=None): - self.dim = dim - self.alpha = alpha - self.beta = beta - # check if optimal parameter is in interval... - if int_opt is not None: - self.x_opt = np.random.uniform(int_opt[0], int_opt[1], size=(1, dim)) - # ... or based on a single value - elif val_opt is not None: - self.one_pm = np.where(np.random.rand(1, dim) > 0.5, 1, -1) - self.x_opt = val_opt * self.one_pm - else: - raise ValueError("Optimal value or interval has to be defined") - self.f_opt = np.round(np.clip(scistats.cauchy.rvs(loc=0, scale=100, size=1)[0], -1000, 1000), decimals=2) - self.i = np.arange(self.dim) - self._lambda_alpha = None - self._q = None - self._r = None - - def __call__(self, x): - return self.evaluate_full(x) - - def evaluate_full(self, x): - raise NotImplementedError("Subclasses should implement this!") - - def gs(self): - # Gram Schmidt ortho-normalization - a = np.random.randn(self.dim, self.dim) - b, _ = np.linalg.qr(a) - return b - - # TODO: property probably unnecessary - @property - def q(self): - if self._q is None: - self._q = self.gs() - return self._q - - @property - def r(self): - if self._r is None: - self._r = self.gs() - return self._r - - @property - def lambda_alpha(self): - if self._lambda_alpha is None: - if isinstance(self.alpha, int): - lambda_ii = np.power(self.alpha, 1 / 2 * self.i / (self.dim - 1)) - self._lambda_alpha = np.diag(lambda_ii) - else: - lambda_ii = np.power(self.alpha[:, None], 1 / 2 * self.i[None, :] / (self.dim - 1)) - self._lambda_alpha = np.stack([np.diag(l_ii) for l_ii in lambda_ii]) - return self._lambda_alpha - - @staticmethod - def f_pen(x): - return np.sum(np.maximum(0, np.abs(x) - 5), axis=1) - - def t_asy_beta(self, x): - # exp = np.power(x, 1 + self.beta * self.i[:, None] / (self.input_dim - 1) * np.sqrt(x)) - # return np.where(x > 0, exp, x) - return x - - def t_osz(self, x): - x_hat = np.where(x != 0, np.log(np.abs(x)), 0) - c_1 = np.where(x > 0, 10, 5.5) - c_2 = np.where(x > 0, 7.9, 3.1) - return np.sign(x) * np.exp(x_hat + 0.049 * (np.sin(c_1 * x_hat) + np.sin(c_2 * x_hat))) diff --git a/alr_envs/stochastic_search/functions/f_rosenbrock.py b/alr_envs/stochastic_search/functions/f_rosenbrock.py deleted file mode 100644 index a0f6bc4..0000000 --- a/alr_envs/stochastic_search/functions/f_rosenbrock.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np - -from alr_envs.stochastic_search.functions.f_base import BaseObjective - - -class Rosenbrock(BaseObjective): - def __init__(self, dim, int_opt=(-3., 3.)): - super(Rosenbrock, self).__init__(dim, int_opt=int_opt) - self.c = np.maximum(1, np.sqrt(self.dim) / 8) - - def evaluate_full(self, x): - x = np.atleast_2d(x) - assert x.shape[1] == self.dim - - z = self.c * (x - self.x_opt) + 1 - z_end = z[:, 1:] - z_begin = z[:, :-1] - - a = z_begin ** 2 - z_end - b = z_begin - 1 - - return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt - - -class RosenbrockRotated(BaseObjective): - def __init__(self, dim, int_opt=(-3., 3.)): - super(RosenbrockRotated, self).__init__(dim, int_opt=int_opt) - self.c = np.maximum(1, np.sqrt(self.dim) / 8) - - def evaluate_full(self, x): - x = np.atleast_2d(x) - assert x.shape[1] == self.dim - - z = (self.c * self.r @ x.T + 1 / 2).T - a = z[:, :-1] ** 2 - z[:, 1:] - b = z[:, :-1] - 1 - - return np.sum(100 * a ** 2 + b ** 2, axis=1) + self.f_opt - - -class RosenbrockRaw(BaseObjective): - def __init__(self, dim, int_opt=(-3., 3.)): - super(RosenbrockRaw, self).__init__(dim, int_opt=int_opt) - self.x_opt = np.ones((1, dim)) - self.f_opt = 0 - - def evaluate_full(self, x): - x = np.atleast_2d(x) - assert x.shape[1] == self.dim - - a = x[:, :-1] ** 2 - x[:, 1:] - b = x[:, :-1] - 1 - - out = np.sum(100 * a ** 2 + b ** 2, axis=1) - - return out diff --git a/alr_envs/stochastic_search/stochastic_search.py b/alr_envs/stochastic_search/stochastic_search.py deleted file mode 100644 index fd9af8d..0000000 --- a/alr_envs/stochastic_search/stochastic_search.py +++ /dev/null @@ -1,22 +0,0 @@ -import gym -import numpy as np - -from alr_envs.stochastic_search.functions.f_base import BaseObjective - - -class StochasticSearchEnv(gym.Env): - - def __init__(self, cost_f: BaseObjective): - self.cost_f = cost_f - - self.action_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(self.cost_f.dim,), dtype=np.float64) - self.observation_space = gym.spaces.Box(low=(), high=(), shape=(), dtype=np.float64) - - def step(self, action): - return np.zeros(self.observation_space.shape), np.squeeze(-self.cost_f(action)), True, {} - - def reset(self): - return np.zeros(self.observation_space.shape) - - def render(self, mode='human'): - pass diff --git a/alr_envs/utils/__init__.py b/alr_envs/utils/__init__.py index e69de29..bb4b0bc 100644 --- a/alr_envs/utils/__init__.py +++ b/alr_envs/utils/__init__.py @@ -0,0 +1,66 @@ +import re +from typing import Union + +import gym +from gym.envs.registration import register + +from alr_envs.utils.make_env_helpers import make + + +def make_dmc( + id: str, + seed: int = 1, + visualize_reward: bool = True, + from_pixels: bool = False, + height: int = 84, + width: int = 84, + camera_id: int = 0, + frame_skip: int = 1, + episode_length: Union[None, int] = None, + environment_kwargs: dict = {}, + time_limit: Union[None, float] = None, + channels_first: bool = True +): + # Adopted from: https://github.com/denisyarats/dmc2gym/blob/master/dmc2gym/__init__.py + # License: MIT + # Copyright (c) 2020 Denis Yarats + + assert re.match(r"\w+-\w+", id), "env_id does not have the following structure: 'domain_name-task_name'" + domain_name, task_name = id.split("-") + + env_id = f'dmc_{domain_name}_{task_name}_{seed}-v1' + + if from_pixels: + assert not visualize_reward, 'cannot use visualize reward when learning from pixels' + + # shorten episode length + if episode_length is None: + # Default lengths for benchmarking suite is 1000 and for manipulation tasks 250 + episode_length = 250 if domain_name == "manipulation" else 1000 + + max_episode_steps = (episode_length + frame_skip - 1) // frame_skip + if env_id not in gym.envs.registry.env_specs: + task_kwargs = {'random': seed} + # if seed is not None: + # task_kwargs['random'] = seed + if time_limit is not None: + task_kwargs['time_limit'] = time_limit + register( + id=env_id, + entry_point='alr_envs.dmc.dmc_wrapper:DMCWrapper', + kwargs=dict( + domain_name=domain_name, + task_name=task_name, + task_kwargs=task_kwargs, + environment_kwargs=environment_kwargs, + visualize_reward=visualize_reward, + from_pixels=from_pixels, + height=height, + width=width, + camera_id=camera_id, + frame_skip=frame_skip, + channels_first=channels_first, + ), + max_episode_steps=max_episode_steps, + ) + return gym.make(env_id) diff --git a/alr_envs/utils/make_env_helpers.py b/alr_envs/utils/make_env_helpers.py index d455496..fc73b05 100644 --- a/alr_envs/utils/make_env_helpers.py +++ b/alr_envs/utils/make_env_helpers.py @@ -1,29 +1,218 @@ -from alr_envs.utils.mps.dmp_wrapper import DmpWrapper -from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper +from typing import Iterable, Type, Union + import gym -from gym.vector.utils import write_to_shared_memory -import sys +import numpy as np +from gym.envs.registration import EnvSpec + +from mp_env_api import MPEnvWrapper +from mp_env_api.mp_wrappers.detpmp_wrapper import DetPMPWrapper +from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper -def make_env(env_id, seed, rank): - env = gym.make(env_id) - env.seed(seed + rank) - return lambda: env +def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs): + """ + TODO: Do we need this? + Generate a callable to create a new gym environment with a given seed. + The rank is added to the seed and can be used for example when using vector environments. + E.g. [make_rank("my_env_name-v0", 123, i) for i in range(8)] creates a list of 8 environments + with seeds 123 through 130. + Hence, testing environments should be seeded with a value which is offset by the number of training environments. + Here e.g. [make_rank("my_env_name-v0", 123 + 8, i) for i in range(5)] for 5 testing environmetns + + Args: + env_id: name of the environment + seed: seed for deterministic behaviour + rank: environment rank for deterministic over multiple seeds behaviour + return_callable: If True returns a callable to create the environment instead of the environment itself. + + Returns: + + """ + + def f(): + return make(env_id, seed + rank, **kwargs) + + return f if return_callable else f() -def make_contextual_env(env_id, context, seed, rank): - env = gym.make(env_id, context=context) - env.seed(seed + rank) - return lambda: env +def make(env_id: str, seed, **kwargs): + """ + Converts an env_id to an environment with the gym API. + This also works for DeepMind Control Suite interface_wrappers + for which domain name and task name are expected to be separated by "-". + Args: + env_id: gym name or env_id of the form "domain_name-task_name" for DMC tasks + **kwargs: Additional kwargs for the constructor such as pixel observations, etc. + + Returns: Gym environment + + """ + try: + # Add seed to kwargs in case it is a predefined gym+dmc hybrid environment. + if env_id.startswith("dmc"): + kwargs.update({"seed": seed}) + + # Gym + env = gym.make(env_id, **kwargs) + env.seed(seed) + env.action_space.seed(seed) + env.observation_space.seed(seed) + except gym.error.Error: + + # MetaWorld env + import metaworld + if env_id in metaworld.ML1.ENV_NAMES: + env = metaworld.envs.ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id + "-goal-observable"](seed=seed, **kwargs) + # setting this avoids generating the same initialization after each reset + env._freeze_rand_vec = False + # Manually set spec, as metaworld environments are not registered via gym + env.unwrapped.spec = EnvSpec(env_id) + # Set Timelimit based on the maximum allowed path length of the environment + env = gym.wrappers.TimeLimit(env, max_episode_steps=env.max_path_length) + env.seed(seed) + env.action_space.seed(seed) + env.observation_space.seed(seed) + env.goal_space.seed(seed) + + else: + # DMC + from alr_envs import make_dmc + env = make_dmc(env_id, seed=seed, **kwargs) + + assert env.base_step_limit == env.spec.max_episode_steps, \ + f"The specified 'episode_length' of {env.spec.max_episode_steps} steps for gym is different from " \ + f"the DMC environment specification of {env.base_step_limit} steps." + + return env -def make_dmp_env(**kwargs): - name = kwargs.pop("name") - _env = gym.make(name) - return DmpWrapper(_env, **kwargs) +def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs): + """ + Helper function for creating a wrapped gym environment using MPs. + It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is + provided to expose the interface for MPs. + + Args: + env_id: name of the environment + wrappers: list of wrappers (at least an MPEnvWrapper), + seed: seed of environment + + Returns: gym environment with all specified wrappers applied + + """ + # _env = gym.make(env_id) + _env = make(env_id, seed, **kwargs) + + assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \ + "At least one MPEnvWrapper is required in order to leverage motion primitive environments." + for w in wrappers: + _env = w(_env) + + return _env -def make_detpmp_env(**kwargs): - name = kwargs.pop("name") - _env = gym.make(name) - return DetPMPWrapper(_env, **kwargs) +def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): + """ + This can also be used standalone for manually building a custom DMP environment. + Args: + env_id: base_env_name, + wrappers: list of wrappers (at least an MPEnvWrapper), + seed: seed of environment + mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP + + Returns: DMP wrapped gym env + + """ + _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) + + _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + + _verify_dof(_env, mp_kwargs.get("num_dof")) + + return DmpWrapper(_env, **mp_kwargs) + + +def make_detpmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs): + """ + This can also be used standalone for manually building a custom Det ProMP environment. + Args: + env_id: base_env_name, + wrappers: list of wrappers (at least an MPEnvWrapper), + mp_kwargs: dict of at least {num_dof: int, num_basis: int, width: int} + + Returns: DMP wrapped gym env + + """ + _verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None)) + + _env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, seed=seed, **kwargs) + + _verify_dof(_env, mp_kwargs.get("num_dof")) + + return DetPMPWrapper(_env, **mp_kwargs) + + +def make_dmp_env_helper(**kwargs): + """ + Helper function for registering a DMP gym environments. + Args: + **kwargs: expects at least the following: + { + "name": base_env_name, + "wrappers": list of wrappers (at least an MPEnvWrapper), + "mp_kwargs": dict of at least {num_dof: int, num_basis: int} for DMP + } + + Returns: DMP wrapped gym env + + """ + seed = kwargs.pop("seed", None) + return make_dmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, + mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) + + +def make_detpmp_env_helper(**kwargs): + """ + Helper function for registering ProMP gym environments. + This can also be used standalone for manually building a custom ProMP environment. + Args: + **kwargs: expects at least the following: + { + "name": base_env_name, + "wrappers": list of wrappers (at least an MPEnvWrapper), + "mp_kwargs": dict of at least {num_dof: int, num_basis: int, width: int} + } + + Returns: DMP wrapped gym env + + """ + seed = kwargs.pop("seed", None) + return make_detpmp_env(env_id=kwargs.pop("name"), wrappers=kwargs.pop("wrappers"), seed=seed, + mp_kwargs=kwargs.pop("mp_kwargs"), **kwargs) + + +def _verify_time_limit(mp_time_limit: Union[None, float], env_time_limit: Union[None, float]): + """ + When using DMC check if a manually specified time limit matches the trajectory duration the MP receives. + Mostly, the time_limit for DMC is not specified and the default values from DMC are taken. + This check, however, can only been done after instantiating the environment. + It can be found in the BaseMP class. + + Args: + mp_time_limit: max trajectory length of mp in seconds + env_time_limit: max trajectory length of DMC environment in seconds + + Returns: + + """ + if mp_time_limit is not None and env_time_limit is not None: + assert mp_time_limit == env_time_limit, \ + f"The specified 'time_limit' of {env_time_limit}s does not match " \ + f"the duration of {mp_time_limit}s for the MP." + + +def _verify_dof(base_env: gym.Env, dof: int): + action_shape = np.prod(base_env.action_space.shape) + assert dof == action_shape, \ + f"The specified degrees of freedom ('num_dof') {dof} do not match " \ + f"the action space of {action_shape} the base environments" diff --git a/alr_envs/utils/mp_env_async_sampler.py b/alr_envs/utils/mp_env_async_sampler.py deleted file mode 100644 index 5cda41f..0000000 --- a/alr_envs/utils/mp_env_async_sampler.py +++ /dev/null @@ -1,124 +0,0 @@ -import gym -from gym.vector.async_vector_env import AsyncVectorEnv -import numpy as np -from _collections import defaultdict - - -def make_env(env_id, rank, seed=0, **env_kwargs): - env = gym.make(env_id, **env_kwargs) - env.seed(seed + rank) - return lambda: env - - -def split_array(ary, size): - n_samples = len(ary) - if n_samples < size: - tmp = np.zeros((size, ary.shape[1])) - tmp[0:n_samples] = ary - return [tmp] - elif n_samples == size: - return [ary] - else: - repeat = int(np.ceil(n_samples / size)) - split = [k * size for k in range(1, repeat)] - sub_arys = np.split(ary, split) - - if n_samples % size != 0: - tmp = np.zeros_like(sub_arys[0]) - last = sub_arys[-1] - tmp[0: len(last)] = last - sub_arys[-1] = tmp - - return sub_arys - - -def _flatten_list(l): - assert isinstance(l, (list, tuple)) - assert len(l) > 0 - assert all([len(l_) > 0 for l_ in l]) - - return [l__ for l_ in l for l__ in l_] - - -class DummyDist: - def __init__(self, dim): - self.dim = dim - - def sample(self, contexts): - contexts = np.atleast_2d(contexts) - n_samples = contexts.shape[0] - return np.random.normal(size=(n_samples, self.dim)), contexts - - -class AlrMpEnvSampler: - """ - An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of - parameters and returns the corresponding final obs, rewards, dones and info dicts. - """ - def __init__(self, env_id, num_envs, seed=0, **env_kwargs): - self.num_envs = num_envs - self.env = AsyncVectorEnv([make_env(env_id, seed, i, **env_kwargs) for i in range(num_envs)]) - - def __call__(self, params): - params = np.atleast_2d(params) - n_samples = params.shape[0] - split_params = split_array(params, self.num_envs) - - vals = defaultdict(list) - for p in split_params: - self.env.reset() - obs, reward, done, info = self.env.step(p) - vals['obs'].append(obs) - vals['reward'].append(reward) - vals['done'].append(done) - vals['info'].append(info) - - # do not return values above threshold - return np.vstack(vals['obs'])[:n_samples], np.hstack(vals['reward'])[:n_samples],\ - _flatten_list(vals['done'])[:n_samples], _flatten_list(vals['info'])[:n_samples] - - -class AlrContextualMpEnvSampler: - """ - An asynchronous sampler for contextual MPWrapper environments. A sampler object can be called with a set of - parameters and returns the corresponding final obs, rewards, dones and info dicts. - """ - def __init__(self, env_id, num_envs, seed=0, **env_kwargs): - self.num_envs = num_envs - self.env = AsyncVectorEnv([make_env(env_id, seed, i, **env_kwargs) for i in range(num_envs)]) - - def __call__(self, dist, n_samples): - - repeat = int(np.ceil(n_samples / self.env.num_envs)) - vals = defaultdict(list) - for i in range(repeat): - new_contexts = self.env.reset() - vals['new_contexts'].append(new_contexts) - new_samples, new_contexts = dist.sample(new_contexts) - vals['new_samples'].append(new_samples) - - obs, reward, done, info = self.env.step(new_samples) - vals['obs'].append(obs) - vals['reward'].append(reward) - vals['done'].append(done) - vals['info'].append(info) - - # do not return values above threshold - return np.vstack(vals['new_samples'])[:n_samples], np.vstack(vals['new_contexts'])[:n_samples], \ - np.vstack(vals['obs'])[:n_samples], np.hstack(vals['reward'])[:n_samples], \ - _flatten_list(vals['done'])[:n_samples], _flatten_list(vals['info'])[:n_samples] - - -if __name__ == "__main__": - env_name = "alr_envs:HoleReacherDetPMP-v1" - n_cpu = 8 - dim = 25 - n_samples = 10 - - sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu) - - thetas = np.random.randn(n_samples, dim) # usually form a search distribution - - _, rewards, __, ___ = sampler(thetas) - - print(rewards) diff --git a/alr_envs/utils/mps/detpmp_wrapper.py b/alr_envs/utils/mps/detpmp_wrapper.py deleted file mode 100644 index 8661781..0000000 --- a/alr_envs/utils/mps/detpmp_wrapper.py +++ /dev/null @@ -1,42 +0,0 @@ -import gym -import numpy as np -from mp_lib import det_promp - -from alr_envs.utils.mps.mp_environments import AlrEnv -from alr_envs.utils.mps.mp_wrapper import MPWrapper - - -class DetPMPWrapper(MPWrapper): - def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, width: float, duration: float = 1, dt: float = 0.01, - post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1., - zero_start: bool = False, zero_goal: bool = False, **mp_kwargs): - self.duration = duration # seconds - - dt = env.dt if hasattr(env, "dt") else dt - assert dt is not None - self.dt = dt - - super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis, - width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs) - - action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof)) - self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32) - - def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None, - off: float = 0.01, zero_start: bool = False, zero_goal: bool = False): - pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off, - zero_start=zero_start, zero_goal=zero_goal) - - weights = np.zeros(shape=(num_basis, num_dof)) - pmp.set_weights(duration, weights) - - return pmp - - def mp_rollout(self, action): - params = np.reshape(action, newshape=(self.mp.n_basis, self.mp.n_dof)) * self.weights_scale - self.mp.set_weights(self.duration, params) - _, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.) - if self.mp.zero_start: - des_pos += self.env.start_pos[None, :] - - return des_pos, des_vel diff --git a/alr_envs/utils/mps/dmp_wrapper.py b/alr_envs/utils/mps/dmp_wrapper.py deleted file mode 100644 index 4958a98..0000000 --- a/alr_envs/utils/mps/dmp_wrapper.py +++ /dev/null @@ -1,76 +0,0 @@ -import gym -import numpy as np -from mp_lib import dmps -from mp_lib.basis import DMPBasisGenerator -from mp_lib.phase import ExpDecayPhaseGenerator - -from alr_envs.utils.mps.mp_environments import AlrEnv -from alr_envs.utils.mps.mp_wrapper import MPWrapper - - -class DmpWrapper(MPWrapper): - - def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, - duration: int = 1, alpha_phase: float = 2., dt: float = None, - learn_goal: bool = False, post_traj_time: float = 0., - weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3., - policy_type: str = None, render_mode: str = None): - - """ - This Wrapper generates a trajectory based on a DMP and will only return episodic performances. - Args: - env: - num_dof: - num_basis: - duration: - alpha_phase: - dt: - learn_goal: - post_traj_time: - policy_type: - weights_scale: - goal_scale: - """ - self.learn_goal = learn_goal - dt = env.dt if hasattr(env, "dt") else dt - assert dt is not None - self.t = np.linspace(0, duration, int(duration / dt)) - self.goal_scale = goal_scale - - super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode, - num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor) - - action_bounds = np.inf * np.ones((np.prod(self.mp.weights.shape) + (num_dof if learn_goal else 0))) - self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32) - - def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2., - bandwidth_factor: int = 3): - - phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration) - basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=num_basis, - basis_bandwidth_factor=bandwidth_factor) - - dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator, - duration=duration, dt=dt) - - return dmp - - def goal_and_weights(self, params): - assert params.shape[-1] == self.action_space.shape[0] - params = np.atleast_2d(params) - - if self.learn_goal: - goal_pos = params[0, -self.mp.num_dimensions:] # [num_dof] - params = params[:, :-self.mp.num_dimensions] # [1,num_dof] - else: - goal_pos = self.env.goal_pos - assert goal_pos is not None - - weight_matrix = np.reshape(params, self.mp.weights.shape) # [num_basis, num_dof] - return goal_pos * self.goal_scale, weight_matrix * self.weights_scale - - def mp_rollout(self, action): - self.mp.dmp_start_pos = self.env.start_pos - goal_pos, weight_matrix = self.goal_and_weights(action) - self.mp.set_weights(weight_matrix, goal_pos) - return self.mp.reference_trajectory(self.t) diff --git a/alr_envs/utils/mps/mp_environments.py b/alr_envs/utils/mps/mp_environments.py deleted file mode 100644 index 60db99b..0000000 --- a/alr_envs/utils/mps/mp_environments.py +++ /dev/null @@ -1,33 +0,0 @@ -from abc import abstractmethod -from typing import Union - -import gym -import numpy as np - - -class AlrEnv(gym.Env): - - @property - @abstractmethod - def active_obs(self): - """Returns boolean mask for each observation entry - whether the observation is returned for the contextual case or not. - This effectively allows to filter unwanted or unnecessary observations from the full step-based case. - """ - return np.ones(self.observation_space.shape, dtype=bool) - - @property - @abstractmethod - def start_pos(self) -> Union[float, int, np.ndarray]: - """ - Returns the starting position of the joints - """ - raise NotImplementedError() - - @property - def goal_pos(self) -> Union[float, int, np.ndarray]: - """ - Returns the current final position of the joints for the MP. - By default this returns the starting position. - """ - return self.start_pos diff --git a/alr_envs/utils/mps/mp_wrapper.py b/alr_envs/utils/mps/mp_wrapper.py deleted file mode 100644 index fc9e0bb..0000000 --- a/alr_envs/utils/mps/mp_wrapper.py +++ /dev/null @@ -1,113 +0,0 @@ -from abc import ABC, abstractmethod - -import gym -import numpy as np - -from alr_envs.utils.mps.mp_environments import AlrEnv -from alr_envs.utils.policies import get_policy_class - - -class MPWrapper(gym.Wrapper, ABC): - - def __init__(self, env: AlrEnv, num_dof: int, dt: float, duration: float = 1, post_traj_time: float = 0., - policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs): - super().__init__(env) - - # adjust observation space to reduce version - obs_sp = self.env.observation_space - self.observation_space = gym.spaces.Box(low=obs_sp.low[self.env.active_obs], - high=obs_sp.high[self.env.active_obs], - dtype=obs_sp.dtype) - - assert dt is not None # this should never happen as MPWrapper is a base class - self.post_traj_steps = int(post_traj_time / dt) - - self.mp = self.initialize_mp(num_dof, duration, dt, **mp_kwargs) - self.weights_scale = weights_scale - - policy_class = get_policy_class(policy_type) - self.policy = policy_class(env) - - # rendering - self.render_mode = render_mode - self.render_kwargs = {} - - # TODO: @Max I think this should not be in this class, this functionality should be part of your sampler. - def __call__(self, params, contexts=None): - """ - Can be used to provide a batch of parameter sets - """ - params = np.atleast_2d(params) - obs = [] - rewards = [] - dones = [] - infos = [] - # for p, c in zip(params, contexts): - for p in params: - # self.configure(c) - ob, reward, done, info = self.step(p) - obs.append(ob) - rewards.append(reward) - dones.append(done) - infos.append(info) - - return obs, np.array(rewards), dones, infos - - def reset(self): - return self.env.reset()[self.env.active_obs] - - def step(self, action: np.ndarray): - """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" - trajectory, velocity = self.mp_rollout(action) - - if self.post_traj_steps > 0: - trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])]) - velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))]) - - # self._trajectory = trajectory - # self._velocity = velocity - - rewards = 0 - info = {} - # create random obs as the reset function is called externally - obs = self.env.observation_space.sample() - - for t, pos_vel in enumerate(zip(trajectory, velocity)): - ac = self.policy.get_action(pos_vel[0], pos_vel[1]) - ac = np.clip(ac, self.env.action_space.low, self.env.action_space.high) - obs, rew, done, info = self.env.step(ac) - rewards += rew - # TODO return all dicts? - # [infos[k].append(v) for k, v in info.items()] - if self.render_mode: - self.env.render(mode=self.render_mode, **self.render_kwargs) - if done: - break - - done = True - return obs[self.env.active_obs], rewards, done, info - - def render(self, mode='human', **kwargs): - """Only set render options here, such that they can be used during the rollout. - This only needs to be called once""" - self.render_mode = mode - self.render_kwargs = kwargs - - @abstractmethod - def mp_rollout(self, action): - """ - Generate trajectory and velocity based on the MP - Returns: - trajectory/positions, velocity - """ - raise NotImplementedError() - - @abstractmethod - def initialize_mp(self, num_dof: int, duration: float, dt: float, **kwargs): - """ - Create respective instance of MP - Returns: - MP instance - """ - - raise NotImplementedError diff --git a/alr_envs/utils/policies.py b/alr_envs/utils/policies.py deleted file mode 100644 index e8874d6..0000000 --- a/alr_envs/utils/policies.py +++ /dev/null @@ -1,48 +0,0 @@ -from gym import Env - -from alr_envs.mujoco.alr_mujoco_env import AlrMujocoEnv - - -class BaseController: - def __init__(self, env: Env): - self.env = env - - def get_action(self, des_pos, des_vel): - raise NotImplementedError - - -class PosController(BaseController): - def get_action(self, des_pos, des_vel): - return des_pos - - -class VelController(BaseController): - def get_action(self, des_pos, des_vel): - return des_vel - - -class PDController(BaseController): - def __init__(self, env: AlrMujocoEnv): - self.p_gains = env.p_gains - self.d_gains = env.d_gains - super(PDController, self).__init__(env) - - def get_action(self, des_pos, des_vel): - # TODO: make standardized ALRenv such that all of them have current_pos/vel attributes - cur_pos = self.env.current_pos - cur_vel = self.env.current_vel - if len(des_pos) != len(cur_pos): - des_pos = self.env.extend_des_pos(des_pos) - if len(des_vel) != len(cur_vel): - des_vel = self.env.extend_des_vel(des_vel) - trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel) - return trq - - -def get_policy_class(policy_type): - if policy_type == "motor": - return PDController - elif policy_type == "velocity": - return VelController - elif policy_type == "position": - return PosController diff --git a/alr_envs/utils/utils.py b/alr_envs/utils/utils.py index 89205bd..3354db3 100644 --- a/alr_envs/utils/utils.py +++ b/alr_envs/utils/utils.py @@ -15,8 +15,7 @@ def angle_normalize(x, type="deg"): if type not in ["deg", "rad"]: raise ValueError(f"Invalid type {type}. Choose one of 'deg' or 'rad'.") if type == "deg": - x = np.deg2rad(x) # x * pi / 180 + x = np.deg2rad(x) # x * pi / 180 two_pi = 2 * np.pi return x - two_pi * np.floor((x + np.pi) / two_pi) - diff --git a/example.py b/example.py deleted file mode 100644 index af30138..0000000 --- a/example.py +++ /dev/null @@ -1,115 +0,0 @@ -from collections import defaultdict -import gym -import numpy as np -from alr_envs.utils.mp_env_async_sampler import AlrMpEnvSampler, AlrContextualMpEnvSampler, DummyDist - - -def example_mujoco(): - env = gym.make('alr_envs:ALRReacher-v0') - rewards = 0 - obs = env.reset() - - # number of environment steps - for i in range(10000): - obs, reward, done, info = env.step(env.action_space.sample()) - rewards += reward - - if i % 1 == 0: - env.render() - - if done: - print(rewards) - rewards = 0 - obs = env.reset() - - -def example_mp(env_name="alr_envs:HoleReacherDMP-v0"): - # env = gym.make("alr_envs:ViaPointReacherDMP-v0") - env = gym.make(env_name) - rewards = 0 - # env.render(mode=None) - obs = env.reset() - - # number of samples/full trajectories (multiple environment steps) - for i in range(10): - obs, reward, done, info = env.step(env.action_space.sample()) - rewards += reward - - if i % 1 == 0: - # render full DMP trajectory - # render can only be called once in the beginning as well. That would render every trajectory - # Calling it after every trajectory allows to modify the mode. mode=None, disables rendering. - env.render(mode="human") - - if done: - print(rewards) - rewards = 0 - obs = env.reset() - - -def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D', 16)): - def make_env(env_id, seed, rank): - env = gym.make(env_id) - env.seed(seed + rank) - return lambda: env - - def sample(env: gym.vector.VectorEnv, n_samples=100): - # for plotting - rewards = np.zeros(n_cpu) - - # this would generate more samples than requested if n_samples % num_envs != 0 - repeat = int(np.ceil(n_samples / env.num_envs)) - vals = defaultdict(list) - for i in range(repeat): - obs, reward, done, info = envs.step(envs.action_space.sample()) - vals['obs'].append(obs) - vals['reward'].append(reward) - vals['done'].append(done) - vals['info'].append(info) - rewards += reward - if np.any(done): - print(rewards[done]) - rewards[done] = 0 - - # do not return values above threshold - return (*map(lambda v: np.stack(v)[:n_samples], vals.values()),) - - envs = gym.vector.AsyncVectorEnv([make_env(env_id, seed, i) for i in range(n_cpu)]) - - obs = envs.reset() - print(sample(envs, 16)) - - -def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4): - n_samples = 10 - - sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu) - dim = sampler.env.action_space.spaces[0].shape[0] - - thetas = np.random.randn(n_samples, dim) # usually form a search distribution - - _, rewards, __, ___ = sampler(thetas) - - print(rewards) - - -def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4): - sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu) - dim = sampler.env.action_space.spaces[0].shape[0] - dist = DummyDist(dim) # needs a sample function - - n_samples = 10 - new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples) - - print(new_rewards) - - -if __name__ == '__main__': - # example_mujoco() - # example_dmp("alr_envs:SimpleReacherDMP-v1") - # example_async("alr_envs:LongSimpleReacherDMP-v0", 4) - # example_async_contextual_sampler() - # env = gym.make("alr_envs:HoleReacherDetPMP-v1") - env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0" - # example_async_sampler(env_name) - example_mp(env_name) diff --git a/reacher.egg-info/PKG-INFO b/reacher.egg-info/PKG-INFO deleted file mode 100644 index 9ea9f7e..0000000 --- a/reacher.egg-info/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 1.0 -Name: reacher -Version: 0.0.1 -Summary: UNKNOWN -Home-page: UNKNOWN -Author: UNKNOWN -Author-email: UNKNOWN -License: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN diff --git a/reacher.egg-info/SOURCES.txt b/reacher.egg-info/SOURCES.txt deleted file mode 100644 index b771181..0000000 --- a/reacher.egg-info/SOURCES.txt +++ /dev/null @@ -1,7 +0,0 @@ -README.md -setup.py -reacher.egg-info/PKG-INFO -reacher.egg-info/SOURCES.txt -reacher.egg-info/dependency_links.txt -reacher.egg-info/requires.txt -reacher.egg-info/top_level.txt \ No newline at end of file diff --git a/reacher.egg-info/dependency_links.txt b/reacher.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/reacher.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/reacher.egg-info/requires.txt b/reacher.egg-info/requires.txt deleted file mode 100644 index 1e6c2dd..0000000 --- a/reacher.egg-info/requires.txt +++ /dev/null @@ -1 +0,0 @@ -gym diff --git a/reacher.egg-info/top_level.txt b/reacher.egg-info/top_level.txt deleted file mode 100644 index 8b13789..0000000 --- a/reacher.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/setup.py b/setup.py index 22e1d1c..793864e 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,24 @@ from setuptools import setup -setup(name='alr_envs', - version='0.0.1', - install_requires=['gym', - 'PyQt5', - 'matplotlib', - 'mp_lib @ git+https://git@github.com/maxhuettenrauch/mp_lib@master#egg=mp_lib', - 'mujoco_py'], # And any other dependencies foo needs - ) +setup( + name='alr_envs', + version='0.0.1', + packages=['alr_envs', 'alr_envs.alr', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'], + install_requires=[ + 'gym', + 'PyQt5', + 'matplotlib', + # 'mp_env_api @ git+https://github.com/ALRhub/motion_primitive_env_api.git', + 'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git', + 'mujoco-py<2.1,>=2.0', + 'dm_control', + 'metaworld @ git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld', + ], + + url='https://github.com/ALRhub/alr_envs/', + license='MIT', + author='Fabian Otto, Marcel Sandermann, Maximilian Huettenrauch', + author_email='', + description='Custom Gym environments for various (robotics) tasks. integration of DMC environments into the' + 'gym interface, and support for using motion primitives with gym environments.' +) diff --git a/alr_envs/utils/mps/__init__.py b/test/__init__.py similarity index 100% rename from alr_envs/utils/mps/__init__.py rename to test/__init__.py diff --git a/test/test_dmc_envs.py b/test/test_dmc_envs.py new file mode 100644 index 0000000..a7cd9be --- /dev/null +++ b/test/test_dmc_envs.py @@ -0,0 +1,127 @@ +import unittest + +import gym +import numpy as np + +from dm_control import suite, manipulation + +from alr_envs import make + +DMC_ENVS = [f'{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] +MANIPULATION_SPECS = [f'manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +SEED = 1 + + +class TestStepDMCEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed= random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + length = env.spec.max_episode_steps + if iterations is None: + if length is None: + iterations = 1 + else: + iterations = length + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + obs = env.reset() + + assert done, "Done flag is not True after max episode length." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones) + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_dmc_functionality(self): + """Tests that environments runs without errors using random actions.""" + for env_id in DMC_ENVS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_dmc_determinism(self): + """Tests that identical seeds produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories, which should have the same state and action sequence + for env_id in DMC_ENVS: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, obs2, rwd2, done2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def test_manipulation_functionality(self): + """Tests that environments runs without errors using random actions.""" + for env_id in MANIPULATION_SPECS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_manipulation_determinism(self): + """Tests that identical seeds produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories, which should have the same state and action sequence + for env_id in MANIPULATION_SPECS: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, obs2, rwd2, done2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_envs.py b/test/test_envs.py new file mode 100644 index 0000000..f8d7269 --- /dev/null +++ b/test/test_envs.py @@ -0,0 +1,172 @@ +import unittest + +import gym +import numpy as np + +import alr_envs # noqa +from alr_envs.utils.make_env_helpers import make + +ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point) +SEED = 1 + + +class TestMPEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed= random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + length = env.spec.max_episode_steps + if iterations is None: + if length is None: + iterations = 1 + else: + iterations = length + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + obs = env.reset() + + assert done, "Done flag is not True after max episode length." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones) + + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, obs2, rwd2, done2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] {obs1} and {obs2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_alr_environment_functionality(self): + """Tests that environments runs without errors using random actions for ALR MP envs.""" + with self.subTest(msg="DMP"): + for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + with self.subTest(msg="DetPMP"): + for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_openai_environment_functionality(self): + """Tests that environments runs without errors using random actions for OpenAI gym MP envs.""" + with self.subTest(msg="DMP"): + for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + with self.subTest(msg="DetPMP"): + for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_dmc_environment_functionality(self): + """Tests that environments runs without errors using random actions for DMC MP envs.""" + with self.subTest(msg="DMP"): + for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + with self.subTest(msg="DetPMP"): + for env_id in alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_metaworld_environment_functionality(self): + """Tests that environments runs without errors using random actions for Metaworld MP envs.""" + with self.subTest(msg="DMP"): + for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + with self.subTest(msg="DetPMP"): + for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['DetPMP']: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_alr_environment_determinism(self): + """Tests that identical seeds produce identical trajectories for ALR MP Envs.""" + with self.subTest(msg="DMP"): + self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) + with self.subTest(msg="DetPMP"): + self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + + def test_openai_environment_determinism(self): + """Tests that identical seeds produce identical trajectories for OpenAI gym MP Envs.""" + with self.subTest(msg="DMP"): + self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) + with self.subTest(msg="DetPMP"): + self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + + def test_dmc_environment_determinism(self): + """Tests that identical seeds produce identical trajectories for DMC MP Envs.""" + with self.subTest(msg="DMP"): + self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) + with self.subTest(msg="DetPMP"): + self._run_env_determinism(alr_envs.ALL_DEEPMIND_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + + def test_metaworld_environment_determinism(self): + """Tests that identical seeds produce identical trajectories for Metaworld MP Envs.""" + with self.subTest(msg="DMP"): + self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) + with self.subTest(msg="DetPMP"): + self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DetPMP"]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_metaworld_envs.py b/test/test_metaworld_envs.py new file mode 100644 index 0000000..a6bd244 --- /dev/null +++ b/test/test_metaworld_envs.py @@ -0,0 +1,107 @@ +import unittest + +import gym +import numpy as np + +from alr_envs import make +from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE + +ALL_ENVS = [env.split("-goal-observable")[0] for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +SEED = 1 + + +class TestStepMetaWorlEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed= random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + actions = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + length = env.max_path_length + if iterations is None: + if length is None: + iterations = 1 + else: + iterations = length + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + actions.append(ac) + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + obs = env.reset() + + assert done, "Done flag is not True after max episode length." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, float, f"Returned {reward} as reward, expected float.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_dmc_functionality(self): + """Tests that environments runs without errors using random actions.""" + for env_id in ALL_ENVS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_dmc_determinism(self): + """Tests that identical seeds produce identical trajectories.""" + seed = 0 + # Iterate over two trajectories, which should have the same state and action sequence + for env_id in ALL_ENVS: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertAlmostEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + +if __name__ == '__main__': + unittest.main()