Merge pull request #10 from 1nf0rmagician/dmc_integration
Add open ai gym environments
This commit is contained in:
commit
57b3a178ab
14
README.md
14
README.md
@ -3,7 +3,8 @@
|
|||||||
This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc.
|
This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc.
|
||||||
Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md).
|
Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md).
|
||||||
For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation.
|
For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation.
|
||||||
We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
|
We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually).
|
||||||
|
When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
|
||||||
When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks.
|
When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks.
|
||||||
|
|
||||||
## Environments
|
## Environments
|
||||||
@ -48,6 +49,17 @@ All environments provide the full episode reward and additional information abou
|
|||||||
|
|
||||||
[//]: |`HoleReacherDetPMP-v0`|
|
[//]: |`HoleReacherDetPMP-v0`|
|
||||||
|
|
||||||
|
### OpenAi-gym Environments
|
||||||
|
These environments are wrapped-versions of their OpenAi-gym counterparts.
|
||||||
|
|
||||||
|
|Name| Description|Horizon|Action Dimension|Context Dimension
|
||||||
|
|---|---|---|---|---|
|
||||||
|
|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
|
||||||
|
|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
|
||||||
|
|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4
|
||||||
|
|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
|
||||||
|
|
||||||
|
|
||||||
### Stochastic Search
|
### Stochastic Search
|
||||||
|Name| Description|Horizon|Action Dimension|Observation Dimension
|
|Name| Description|Horizon|Action Dimension|Observation Dimension
|
||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
|
@ -6,6 +6,7 @@ from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import Si
|
|||||||
from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
|
from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
|
||||||
from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
|
from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
|
||||||
|
from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
|
||||||
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
|
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
|
||||||
|
|
||||||
# Mujoco
|
# Mujoco
|
||||||
@ -566,6 +567,83 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
## Open AI
|
||||||
|
register(
|
||||||
|
id='ContinuousMountainCarDetPMP-v0',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "gym.envs.classic_control:MountainCarContinuous-v0",
|
||||||
|
"wrappers": [continuous_mountain_car.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 1,
|
||||||
|
"num_basis": 4,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.02,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": 1.,
|
||||||
|
"d_gains": 1.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='ReacherDetPMP-v2',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "gym.envs.mujoco:Reacher-v2",
|
||||||
|
"wrappers": [reacher_v2.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 2,
|
||||||
|
"num_basis": 6,
|
||||||
|
"duration": 1,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.02,
|
||||||
|
"policy_type": "motor",
|
||||||
|
"policy_kwargs": {
|
||||||
|
"p_gains": .6,
|
||||||
|
"d_gains": .075
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='FetchSlideDenseDetPMP-v1',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "gym.envs.robotics:FetchSlideDense-v1",
|
||||||
|
"wrappers": [fetch.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 4,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.02,
|
||||||
|
"policy_type": "position"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
register(
|
||||||
|
id='FetchReachDenseDetPMP-v1',
|
||||||
|
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
|
||||||
|
kwargs={
|
||||||
|
"name": "gym.envs.robotics:FetchReachDense-v1",
|
||||||
|
"wrappers": [fetch.MPWrapper],
|
||||||
|
"mp_kwargs": {
|
||||||
|
"num_dof": 4,
|
||||||
|
"num_basis": 5,
|
||||||
|
"duration": 2,
|
||||||
|
"post_traj_time": 0,
|
||||||
|
"width": 0.02,
|
||||||
|
"policy_type": "position"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# BBO functions
|
# BBO functions
|
||||||
|
|
||||||
for dim in [5, 10, 25, 50, 100]:
|
for dim in [5, 10, 25, 50, 100]:
|
||||||
|
41
alr_envs/examples/examples_open_ai.py
Normal file
41
alr_envs/examples/examples_open_ai.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from alr_envs.utils.make_env_helpers import make_env
|
||||||
|
|
||||||
|
|
||||||
|
def example_mp(env_name, seed=1):
|
||||||
|
"""
|
||||||
|
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
|
||||||
|
For more information on motion primitive specific stuff, look at the mp examples.
|
||||||
|
Args:
|
||||||
|
env_name: DetPMP env_id
|
||||||
|
seed: seed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
|
||||||
|
env = make_env(env_name, seed)
|
||||||
|
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
# number of samples/full trajectories (multiple environment steps)
|
||||||
|
for i in range(10):
|
||||||
|
ac = env.action_space.sample()
|
||||||
|
obs, reward, done, info = env.step(ac)
|
||||||
|
rewards += reward
|
||||||
|
|
||||||
|
if done:
|
||||||
|
print(rewards)
|
||||||
|
rewards = 0
|
||||||
|
obs = env.reset()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# DMP - not supported yet
|
||||||
|
#example_mp("ReacherDetPMP-v2")
|
||||||
|
|
||||||
|
# DetProMP
|
||||||
|
example_mp("ContinuousMountainCarDetPMP-v0")
|
||||||
|
example_mp("ReacherDetPMP-v2")
|
||||||
|
example_mp("FetchReachDenseDetPMP-v1")
|
||||||
|
example_mp("FetchSlideDenseDetPMP-v1")
|
||||||
|
|
0
alr_envs/open_ai/__init__.py
Normal file
0
alr_envs/open_ai/__init__.py
Normal file
1
alr_envs/open_ai/continuous_mountain_car/__init__.py
Normal file
1
alr_envs/open_ai/continuous_mountain_car/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper
|
22
alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
Normal file
22
alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return np.array([self.state[1]])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return np.array([self.state[0]])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self):
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return 0.02
|
1
alr_envs/open_ai/fetch/__init__.py
Normal file
1
alr_envs/open_ai/fetch/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper
|
22
alr_envs/open_ai/fetch/mp_wrapper.py
Normal file
22
alr_envs/open_ai/fetch/mp_wrapper.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.unwrapped._get_obs()["observation"][-5:-1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.unwrapped._get_obs()["observation"][:4]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def goal_pos(self):
|
||||||
|
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
1
alr_envs/open_ai/reacher_v2/__init__.py
Normal file
1
alr_envs/open_ai/reacher_v2/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper
|
19
alr_envs/open_ai/reacher_v2/mp_wrapper.py
Normal file
19
alr_envs/open_ai/reacher_v2/mp_wrapper.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
|
class MPWrapper(MPEnvWrapper):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_vel(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.sim.data.qvel[:2]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_pos(self) -> Union[float, int, np.ndarray]:
|
||||||
|
return self.sim.data.qpos[:2]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dt(self) -> Union[float, int]:
|
||||||
|
return self.env.dt
|
@ -1,10 +0,0 @@
|
|||||||
Metadata-Version: 1.0
|
|
||||||
Name: reacher
|
|
||||||
Version: 0.0.1
|
|
||||||
Summary: UNKNOWN
|
|
||||||
Home-page: UNKNOWN
|
|
||||||
Author: UNKNOWN
|
|
||||||
Author-email: UNKNOWN
|
|
||||||
License: UNKNOWN
|
|
||||||
Description: UNKNOWN
|
|
||||||
Platform: UNKNOWN
|
|
@ -1,7 +0,0 @@
|
|||||||
README.md
|
|
||||||
setup.py
|
|
||||||
reacher.egg-info/PKG-INFO
|
|
||||||
reacher.egg-info/SOURCES.txt
|
|
||||||
reacher.egg-info/dependency_links.txt
|
|
||||||
reacher.egg-info/requires.txt
|
|
||||||
reacher.egg-info/top_level.txt
|
|
@ -1 +0,0 @@
|
|||||||
|
|
@ -1 +0,0 @@
|
|||||||
gym
|
|
@ -1 +0,0 @@
|
|||||||
|
|
5
setup.py
5
setup.py
@ -3,14 +3,15 @@ from setuptools import setup
|
|||||||
setup(
|
setup(
|
||||||
name='alr_envs',
|
name='alr_envs',
|
||||||
version='0.0.1',
|
version='0.0.1',
|
||||||
packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
|
packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.open_ai', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
|
||||||
'alr_envs.utils'],
|
'alr_envs.utils'],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'gym',
|
'gym',
|
||||||
'PyQt5',
|
'PyQt5',
|
||||||
'matplotlib',
|
'matplotlib',
|
||||||
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
|
||||||
'mujoco_py'
|
'mujoco-py<2.1,>=2.0',
|
||||||
|
'dm_control'
|
||||||
],
|
],
|
||||||
|
|
||||||
url='https://github.com/ALRhub/alr_envs/',
|
url='https://github.com/ALRhub/alr_envs/',
|
||||||
|
Loading…
Reference in New Issue
Block a user