Add open ai gym environments

This commit is contained in:
Marcel 2021-07-01 14:55:14 +02:00
parent 6607d9cff9
commit 87eb093c2c
13 changed files with 227 additions and 0 deletions

View File

@ -48,6 +48,17 @@ All environments provide the full episode reward and additional information abou
[//]: |`HoleReacherDetPMP-v0`| [//]: |`HoleReacherDetPMP-v0`|
### OpenAi-gym Environments
These environments are wrapped-versions of their OpenAi-gym counterparts.
|Name| Description|Horizon|Action Dimension|Context Dimension
|---|---|---|---|---|
|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4
|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
### Stochastic Search ### Stochastic Search
|Name| Description|Horizon|Action Dimension|Observation Dimension |Name| Description|Horizon|Action Dimension|Observation Dimension
|---|---|---|---|---| |---|---|---|---|---|

View File

@ -7,6 +7,7 @@ from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper impor
from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper from alr_envs.dmc.Ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_positional_wrapper import BallInACupPositionalWrapper
from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
# Mujoco # Mujoco
@ -560,6 +561,82 @@ register(
} }
) )
## Open AI
register(
id='ContinuousMountainCarDetPMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.classic_control:MountainCarContinuous-v0",
"wrappers": [continuous_mountain_car.PositionalWrapper, continuous_mountain_car.MPWrapper],
"mp_kwargs": {
"num_dof": 1,
"num_basis": 4,
"duration": 100,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "motor",
"policy_kwargs": {
"p_gains": 1.,
"d_gains": 1.
}
}
}
)
register(
id='ReacherDetPMP-v2',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.mujoco:Reacher-v2",
"wrappers": [reacher_v2.PositionalWrapper, reacher_v2.MPWrapper],
"mp_kwargs": {
"num_dof": 2,
"num_basis": 6,
"duration": 1,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "motor",
"policy_kwargs": {
"p_gains": .6,
"d_gains": .075
}
}
}
)
register(
id='FetchSlideDenseDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchSlideDense-v1",
"wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 1,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
register(
id='FetchReachDenseDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchReachDense-v1",
"wrappers": [fetch.PositionalWrapper, fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 1,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
# BBO functions # BBO functions

View File

@ -0,0 +1,41 @@
from alr_envs.utils.make_env_helpers import make_env
def example_mp(env_name, seed=1):
"""
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
For more information on motion primitive specific stuff, look at the mp examples.
Args:
env_name: DetPMP env_id
seed: seed
Returns:
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = make_env(env_name, seed)
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
if __name__ == '__main__':
# DMP - not supported yet
#example_mp("ReacherDetPMP-v2")
# DetProMP
example_mp("ContinuousMountainCarDetPMP-v0")
example_mp("ReacherDetPMP-v2")
example_mp("FetchReachDenseDetPMP-v1")
example_mp("FetchSlideDenseDetPMP-v1")

View File

View File

@ -0,0 +1,2 @@
from alr_envs.open_ai.continuous_mountain_car.positional_wrapper import PositionalWrapper
from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper

View File

@ -0,0 +1,17 @@
from typing import Union
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def start_pos(self):
raise ValueError("Start position is not available")
@property
def goal_pos(self):
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return 1.

View File

@ -0,0 +1,13 @@
from typing import Union
import numpy as np
from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
class PositionalWrapper(PositionalEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return np.array([self.state[1]])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return np.array([self.state[0]])

View File

@ -0,0 +1,2 @@
from alr_envs.open_ai.fetch.positional_wrapper import PositionalWrapper
from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper

View File

@ -0,0 +1,18 @@
from typing import Union
from gym import spaces
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def start_pos(self):
return self.initial_gripper_xpos
@property
def goal_pos(self):
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,13 @@
from typing import Union
import numpy as np
from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
class PositionalWrapper(PositionalEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return self._get_obs()["observation"][-5:-1]
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self._get_obs()["observation"][:4]

View File

@ -0,0 +1,2 @@
from alr_envs.open_ai.reacher_v2.positional_wrapper import PositionalWrapper
from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper

View File

@ -0,0 +1,18 @@
from typing import Union
from mp_env_api.env_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def start_pos(self):
raise ValueError("Start position is not available")
@property
def goal_pos(self):
return self.goal
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1,13 @@
from typing import Union
import numpy as np
from mp_env_api.env_wrappers.positional_env_wrapper import PositionalEnvWrapper
class PositionalWrapper(PositionalEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qvel[:2]
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qpos[:2]