Add open ai gym environments

2021-07-01 14:55:14 +02:00 · 2021-07-01 14:55:14 +02:00 · 87eb093c2c
commit 87eb093c2c
parent 6607d9cff9
13 changed files with 227 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou
 [//]:  |`HoleReacherDetPMP-v0`|
 ### OpenAi-gym Environments
 These environments are wrapped-versions of their OpenAi-gym counterparts.
 |Name| Description|Horizon|Action Dimension|Context Dimension
 |---|---|---|---|---|
 |`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
 |`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
 |`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4 
 |`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
 ### Stochastic Search
 |Name| Description|Horizon|Action Dimension|Observation Dimension
 |---|---|---|---|---|
--- a/alr_envs/init.py
+++ b/alr_envs/init.py
@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor
 from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
 from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
 from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
 from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
 # Mujoco
@ -560,6 +561,82 @@ register(
    }
 )
 ## Open AI
 register(
    id='ContinuousMountainCarDetPMP-v0',
    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
    kwargs={
        "name": "gym.envs.classic_control:MountainCarContinuous-v0",
        "wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper],
        "mp_kwargs": {
            "num_dof": 1,
            "num_basis": 4,
            "duration": 100,
            "post_traj_time": 0,
            "width": 0.02,
            "policy_type": "motor",
            "policy_kwargs": {
                "p_gains": 1.,
                "d_gains": 1.
            }
        }
    }
 )
 register(
    id='ReacherDetPMP-v2',
    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
    kwargs={
        "name": "gym.envs.mujoco:Reacher-v2",
        "wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper],
        "mp_kwargs": {
            "num_dof": 2,
            "num_basis": 6,
            "duration": 1,
            "post_traj_time": 0,
            "width": 0.02,
            "policy_type": "motor",
            "policy_kwargs": {
                "p_gains": .6,
                "d_gains": .075
            }
        }
    }
 )
 register(
    id='FetchSlideDenseDetPMP-v1',
    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
    kwargs={
        "name": "gym.envs.robotics:FetchSlideDense-v1",
        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
        "mp_kwargs": {
            "num_dof": 4,
            "num_basis": 5,
            "duration": 1,
            "post_traj_time": 0,
            "width": 0.02,
            "policy_type": "position"
        }
    }
 )
 register(
    id='FetchReachDenseDetPMP-v1',
    entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
    kwargs={
        "name": "gym.envs.robotics:FetchReachDense-v1",
        "wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
        "mp_kwargs": {
            "num_dof": 4,
            "num_basis": 5,
            "duration": 1,
            "post_traj_time": 0,
            "width": 0.02,
            "policy_type": "position"
        }
    }
 )
 # BBO functions
--- a/alr_envs/examples/examples_open_ai.py
+++ b/alr_envs/examples/examples_open_ai.py
@ -0,0 +1,41 @@
 from alr_envs.utils.make_env_helpers import make_env
 def example_mp(env_name, seed=1):
    """
    Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
    For more information on motion primitive specific stuff, look at the mp examples.
    Args:
        env_name: DetPMP env_id
        seed: seed
    Returns:
    """
    # While in this case gym.make() is possible to use as well, we recommend our custom make env function.
    env = make_env(env_name, seed)
    rewards = 0
    obs = env.reset()
    # number of samples/full trajectories (multiple environment steps)
    for i in range(10):
        ac = env.action_space.sample()
        obs, reward, done, info = env.step(ac)
        rewards += reward
        if done:
            print(rewards)
            rewards = 0
            obs = env.reset()
 if __name__ == '__main__':
    # DMP - not supported yet
    #example_mp("ReacherDetPMP-v2")
    # DetProMP
    example_mp("ContinuousMountainCarDetPMP-v0")
    example_mp("ReacherDetPMP-v2")
    example_mp("FetchReachDenseDetPMP-v1")
    example_mp("FetchSlideDenseDetPMP-v1")
--- a/alr_envs/open_ai/init.py
+++ b/alr_envs/open_ai/init.py
--- a/alr_envs/open_ai/continuous_mountain_car/init.py
+++ b/alr_envs/open_ai/continuous_mountain_car/init.py
@ -0,0 +1,2 @@
 from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper
 from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
@ -0,0 +1,17 @@
 from typing import Union
 from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def start_pos(self):
        raise ValueError("Start position is not available")
    @property
    def goal_pos(self):
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return 1.
--- a/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
+++ b/alr_envs/open_ai/continuous_mountain_car/positional_wrapper.py
@ -0,0 +1,13 @@
 from typing import Union
 import numpy as np
 from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
 class PositionalWrapper(PositionalEnvWrapper):
    @property
    def current_vel(self) -> Union[float, int, np.ndarray]:
        return np.array([self.state[1]])
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return np.array([self.state[0]])
--- a/alr_envs/open_ai/fetch/init.py
+++ b/alr_envs/open_ai/fetch/init.py
@ -0,0 +1,2 @@
 from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper
 from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/fetch/mp_wrapper.py
+++ b/alr_envs/open_ai/fetch/mp_wrapper.py
@ -0,0 +1,18 @@
 from typing import Union
 from gym import spaces
 from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def start_pos(self):
        return self.initial_gripper_xpos
    @property
    def goal_pos(self):
        raise ValueError("Goal position is not available and has to be learnt based on the environment.")
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/open_ai/fetch/positional_wrapper.py
+++ b/alr_envs/open_ai/fetch/positional_wrapper.py
@ -0,0 +1,13 @@
 from typing import Union
 import numpy as np
 from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
 class PositionalWrapper(PositionalEnvWrapper):
    @property
    def current_vel(self) -> Union[float, int, np.ndarray]:
        return self._get_obs()["observation"][-5:-1]
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self._get_obs()["observation"][:4]
--- a/alr_envs/open_ai/reacher_v2/init.py
+++ b/alr_envs/open_ai/reacher_v2/init.py
@ -0,0 +1,2 @@
 from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper
 from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
--- a/alr_envs/open_ai/reacher_v2/mp_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/mp_wrapper.py
@ -0,0 +1,18 @@
 from typing import Union
 from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
 class MPWrapper(MPEnvWrapper):
    @property
    def start_pos(self):
        raise ValueError("Start position is not available")
    @property
    def goal_pos(self):
        return self.goal
    @property
    def dt(self) -> Union[float, int]:
        return self.env.dt
--- a/alr_envs/open_ai/reacher_v2/positional_wrapper.py
+++ b/alr_envs/open_ai/reacher_v2/positional_wrapper.py
@ -0,0 +1,13 @@
 from typing import Union
 import numpy as np
 from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
 class PositionalWrapper(PositionalEnvWrapper):
    @property
    def current_vel(self) -> Union[float, int, np.ndarray]:
        return self.sim.data.qvel[:2]
    @property
    def current_pos(self) -> Union[float, int, np.ndarray]:
        return self.sim.data.qpos[:2]
		`@ -0,0 +1,2 @@`
							`from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper`
							`from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper`
		`@ -0,0 +1,2 @@`
							`from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper`
							`from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper`
		`@ -0,0 +1,2 @@`
							`from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper`
							`from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper`