Merge pull request #10 from 1nf0rmagician/dmc_integration

Add open ai gym environments
2021-07-23 14:37:25 +02:00 · 2021-07-23 14:37:25 +02:00 · 57b3a178ab
commit 57b3a178ab
parent 618d333116 92e6a84d03
16 changed files with 201 additions and 23 deletions
--- a/README.md
+++ b/README.md
@ -3,7 +3,8 @@
 This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc. 
 Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md).
 For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation.
-We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
+We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually). 
+When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
 When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks.

 ## Environments
@ -48,6 +49,17 @@ All environments provide the full episode reward and additional information abou

 [//]:  |`HoleReacherDetPMP-v0`|

+### OpenAi-gym Environments
+These environments are wrapped-versions of their OpenAi-gym counterparts.
+
+|Name| Description|Horizon|Action Dimension|Context Dimension
+|---|---|---|---|---|
+|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
+|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
+|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 
+|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
+
+
 ### Stochastic Search
 |Name| Description|Horizon|Action Dimension|Observation Dimension
 |---|---|---|---|---|
--- a/alr_envs/init.py
+++ b/alr_envs/init.py
@ -6,6 +6,7 @@ from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import Si
 from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
 from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
+from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock

 # Mujoco
@ -566,6 +567,83 @@ register(
    }
 )

+## Open AI
+register(
+    id='ContinuousMountainCarDetPMP-v0',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.classic_control:MountainCarContinuous-v0",
+        "wrappers": [continuous_mountain_car.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 1,
+            "num_basis": 4,
+            "duration": 2,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": 1.,
+                "d_gains": 1.
+            }
+        }
+    }
+)
+
+register(
+    id='ReacherDetPMP-v2',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.mujoco:Reacher-v2",
+        "wrappers": [reacher_v2.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 2,
+            "num_basis": 6,
+            "duration": 1,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "motor",
+            "policy_kwargs": {
+                "p_gains": .6,
+                "d_gains": .075
+            }
+        }
+    }
+)
+
+register(
+    id='FetchSlideDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchSlideDense-v1",
+        "wrappers": [fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 2,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
+
+register(
+    id='FetchReachDenseDetPMP-v1',
+    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
+    kwargs={
+        "name": "gym.envs.robotics:FetchReachDense-v1",
+        "wrappers": [fetch.MPWrapper],
+        "mp_kwargs": {
+            "num_dof": 4,
+            "num_basis": 5,
+            "duration": 2,
+            "post_traj_time": 0,
+            "width": 0.02,
+            "policy_type": "position"
+        }
+    }
+)
+
 # BBO functions

 for dim in [5, 10, 25, 50, 100]:
--- a/alr_envs/examples/examples_open_ai.py
+++ b/alr_envs/examples/examples_open_ai.py
@ -0,0 +1,41 @@
+from alr_envs.utils.make_env_helpers import make_env
+
+
+def example_mp(env_name, seed=1):
+    """
+    Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
+    For more information on motion primitive specific stuff, look at the mp examples.
+    Args:
+        env_name: DetPMP env_id
+        seed: seed
+
+    Returns:
+
+    """
+    # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
+    env = make_env(env_name, seed)
+
+    rewards = 0
+    obs = env.reset()
+
+    # number of samples/full trajectories (multiple environment steps)
+    for i in range(10):
+        ac = env.action_space.sample()
+        obs, reward, done, info = env.step(ac)
+        rewards += reward
+
+        if done:
+            print(rewards)
+            rewards = 0
+            obs = env.reset()
+
+if __name__ == '__main__':
+    # DMP - not supported yet
+    #example_mp("ReacherDetPMP-v2")
+
+    # DetProMP
+    example_mp("ContinuousMountainCarDetPMP-v0")
+    example_mp("ReacherDetPMP-v2")
+    example_mp("FetchReachDenseDetPMP-v1")
+    example_mp("FetchSlideDenseDetPMP-v1")
+
--- a/alr_envs/open_ai/init.py
+++ b/alr_envs/open_ai/init.py
--- a/alr_envs/open_ai/continuous_mountain_car/init.py
+++ b/alr_envs/open_ai/continuous_mountain_car/init.py
@ -0,0 +1 @@
+from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@ -0,0 +1,22 @@
+from typing import Union
+
+import numpy as np
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[1]])
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return np.array([self.state[0]])
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return 0.02
--- a/alr_envs/open_ai/fetch/init.py
+++ b/alr_envs/open_ai/fetch/init.py
@ -0,0 +1 @@
+from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@ -0,0 +1,22 @@
+from typing import Union
+
+import numpy as np
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.unwrapped._get_obs()["observation"][-5:-1]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.unwrapped._get_obs()["observation"][:4]
+
+    @property
+    def goal_pos(self):
+        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/alr_envs/open_ai/reacher_v2/init.py
+++ b/alr_envs/open_ai/reacher_v2/init.py
@ -0,0 +1 @@
+from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@ -0,0 +1,19 @@
+from typing import Union
+
+import numpy as np
+from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
+
+
+class MPWrapper(MPEnvWrapper):
+
+    @property
+    def current_vel(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qvel[:2]
+
+    @property
+    def current_pos(self) -> Union[float, int, np.ndarray]:
+        return self.sim.data.qpos[:2]
+
+    @property
+    def dt(self) -> Union[float, int]:
+        return self.env.dt
--- a/reacher.egg-info/PKG-INFO
+++ b/reacher.egg-info/PKG-INFO
@ -1,10 +0,0 @@
-Metadata-Version: 1.0
-Name: reacher
-Version: 0.0.1
-Summary: UNKNOWN
-Home-page: UNKNOWN
-Author: UNKNOWN
-Author-email: UNKNOWN
-License: UNKNOWN
-Description: UNKNOWN
-Platform: UNKNOWN
--- a/reacher.egg-info/SOURCES.txt
+++ b/reacher.egg-info/SOURCES.txt
@ -1,7 +0,0 @@
-README.md
-setup.py
-reacher.egg-info/PKG-INFO
-reacher.egg-info/SOURCES.txt
-reacher.egg-info/dependency_links.txt
-reacher.egg-info/requires.txt
-reacher.egg-info/top_level.txt
--- a/reacher.egg-info/dependency_links.txt
+++ b/reacher.egg-info/dependency_links.txt
@ -1 +0,0 @@
-
--- a/reacher.egg-info/requires.txt
+++ b/reacher.egg-info/requires.txt
@ -1 +0,0 @@
-gym
--- a/reacher.egg-info/top_level.txt
+++ b/reacher.egg-info/top_level.txt
@ -1 +0,0 @@
-
--- a/setup.py
+++ b/setup.py
@ -3,14 +3,15 @@ from setuptools import setup
 setup(
    name='alr_envs',
    version='0.0.1',
-    packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
+    packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.open_ai', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
              'alr_envs.utils'],
    install_requires=[
        'gym',
        'PyQt5',
        'matplotlib',
        'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
-        'mujoco_py'
+        'mujoco-py<2.1,>=2.0',
+        'dm_control'
    ],

    url='https://github.com/ALRhub/alr_envs/',
				`@ -0,0 +1 @@`
				`from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper`
				`@ -0,0 +1 @@`
				`from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper`
				`@ -0,0 +1 @@`
				`from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper`