Merge pull request #10 from 1nf0rmagician/dmc_integration

Add open ai gym environments
This commit is contained in:
ottofabian 2021-07-23 14:37:25 +02:00 committed by GitHub
commit 57b3a178ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 201 additions and 23 deletions

View File

@ -3,7 +3,8 @@
This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc.
Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md).
For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation.
We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually).
When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation.
When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks.
## Environments
@ -48,6 +49,17 @@ All environments provide the full episode reward and additional information abou
[//]: |`HoleReacherDetPMP-v0`|
### OpenAi-gym Environments
These environments are wrapped-versions of their OpenAi-gym counterparts.
|Name| Description|Horizon|Action Dimension|Context Dimension
|---|---|---|---|---|
|`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1
|`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2
|`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4
|`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4
### Stochastic Search
|Name| Description|Horizon|Action Dimension|Observation Dimension
|---|---|---|---|---|

View File

@ -6,6 +6,7 @@ from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import Si
from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper
from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper
from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch
from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock
# Mujoco
@ -566,6 +567,83 @@ register(
}
)
## Open AI
register(
id='ContinuousMountainCarDetPMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.classic_control:MountainCarContinuous-v0",
"wrappers": [continuous_mountain_car.MPWrapper],
"mp_kwargs": {
"num_dof": 1,
"num_basis": 4,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "motor",
"policy_kwargs": {
"p_gains": 1.,
"d_gains": 1.
}
}
}
)
register(
id='ReacherDetPMP-v2',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.mujoco:Reacher-v2",
"wrappers": [reacher_v2.MPWrapper],
"mp_kwargs": {
"num_dof": 2,
"num_basis": 6,
"duration": 1,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "motor",
"policy_kwargs": {
"p_gains": .6,
"d_gains": .075
}
}
}
)
register(
id='FetchSlideDenseDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchSlideDense-v1",
"wrappers": [fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
register(
id='FetchReachDenseDetPMP-v1',
entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper',
kwargs={
"name": "gym.envs.robotics:FetchReachDense-v1",
"wrappers": [fetch.MPWrapper],
"mp_kwargs": {
"num_dof": 4,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"width": 0.02,
"policy_type": "position"
}
}
)
# BBO functions
for dim in [5, 10, 25, 50, 100]:

View File

@ -0,0 +1,41 @@
from alr_envs.utils.make_env_helpers import make_env
def example_mp(env_name, seed=1):
"""
Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered.
For more information on motion primitive specific stuff, look at the mp examples.
Args:
env_name: DetPMP env_id
seed: seed
Returns:
"""
# While in this case gym.make() is possible to use as well, we recommend our custom make env function.
env = make_env(env_name, seed)
rewards = 0
obs = env.reset()
# number of samples/full trajectories (multiple environment steps)
for i in range(10):
ac = env.action_space.sample()
obs, reward, done, info = env.step(ac)
rewards += reward
if done:
print(rewards)
rewards = 0
obs = env.reset()
if __name__ == '__main__':
# DMP - not supported yet
#example_mp("ReacherDetPMP-v2")
# DetProMP
example_mp("ContinuousMountainCarDetPMP-v0")
example_mp("ReacherDetPMP-v2")
example_mp("FetchReachDenseDetPMP-v1")
example_mp("FetchSlideDenseDetPMP-v1")

View File

View File

@ -0,0 +1 @@
from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper

View File

@ -0,0 +1,22 @@
from typing import Union
import numpy as np
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return np.array([self.state[1]])
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return np.array([self.state[0]])
@property
def goal_pos(self):
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return 0.02

View File

@ -0,0 +1 @@
from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper

View File

@ -0,0 +1,22 @@
from typing import Union
import numpy as np
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return self.unwrapped._get_obs()["observation"][-5:-1]
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.unwrapped._get_obs()["observation"][:4]
@property
def goal_pos(self):
raise ValueError("Goal position is not available and has to be learnt based on the environment.")
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -0,0 +1 @@
from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper

View File

@ -0,0 +1,19 @@
from typing import Union
import numpy as np
from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper
class MPWrapper(MPEnvWrapper):
@property
def current_vel(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qvel[:2]
@property
def current_pos(self) -> Union[float, int, np.ndarray]:
return self.sim.data.qpos[:2]
@property
def dt(self) -> Union[float, int]:
return self.env.dt

View File

@ -1,10 +0,0 @@
Metadata-Version: 1.0
Name: reacher
Version: 0.0.1
Summary: UNKNOWN
Home-page: UNKNOWN
Author: UNKNOWN
Author-email: UNKNOWN
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN

View File

@ -1,7 +0,0 @@
README.md
setup.py
reacher.egg-info/PKG-INFO
reacher.egg-info/SOURCES.txt
reacher.egg-info/dependency_links.txt
reacher.egg-info/requires.txt
reacher.egg-info/top_level.txt

View File

@ -1 +0,0 @@

View File

@ -1 +0,0 @@
gym

View File

@ -1 +0,0 @@

View File

@ -3,14 +3,15 @@ from setuptools import setup
setup(
name='alr_envs',
version='0.0.1',
packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.open_ai', 'alr_envs.mujoco', 'alr_envs.stochastic_search',
'alr_envs.utils'],
install_requires=[
'gym',
'PyQt5',
'matplotlib',
'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git',
'mujoco_py'
'mujoco-py<2.1,>=2.0',
'dm_control'
],
url='https://github.com/ALRhub/alr_envs/',