Merge pull request #10 from 1nf0rmagician/dmc_integration
Add open ai gym environments
This commit is contained in:
		
						commit
						57b3a178ab
					
				
							
								
								
									
										14
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								README.md
									
									
									
									
									
								
							| @ -3,7 +3,8 @@ | ||||
| This repository collects custom Robotics environments not included in benchmark suites like OpenAI gym, rllab, etc.  | ||||
| Creating a custom (Mujoco) gym environment can be done according to [this guide](https://github.com/openai/gym/blob/master/docs/creating-environments.md). | ||||
| For stochastic search problems with gym interface use the `Rosenbrock-v0` reference implementation. | ||||
| We also support to solve environments with DMPs. When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation. | ||||
| We also support to solve environments with Dynamic Movement Primitives (DMPs) and Probabilistic Movement Primitives (DetPMP, we only consider the mean usually).  | ||||
| When adding new DMP tasks check the `ViaPointReacherDMP-v0` reference implementation. | ||||
| When simply using the tasks, you can also leverage the wrapper class `DmpWrapper` to turn normal gym environments in to DMP tasks. | ||||
| 
 | ||||
| ## Environments | ||||
| @ -48,6 +49,17 @@ All environments provide the full episode reward and additional information abou | ||||
| 
 | ||||
| [//]:  |`HoleReacherDetPMP-v0`| | ||||
| 
 | ||||
| ### OpenAi-gym Environments | ||||
| These environments are wrapped-versions of their OpenAi-gym counterparts. | ||||
| 
 | ||||
| |Name| Description|Horizon|Action Dimension|Context Dimension | ||||
| |---|---|---|---|---| | ||||
| |`ContinuousMountainCarDetPMP-v0`| A DetPmP wrapped version of the ContinuousMountainCar-v0 environment. | 100 | 1 | ||||
| |`ReacherDetPMP-v2`| A DetPmP wrapped version of the Reacher-v2 environment. | 50 | 2 | ||||
| |`FetchSlideDenseDetPMP-v1`| A DetPmP wrapped version of the FetchSlideDense-v1 environment. | 50 | 4  | ||||
| |`FetchReachDenseDetPMP-v1`| A DetPmP wrapped version of the FetchReachDense-v1 environment. | 50 | 4 | ||||
| 
 | ||||
| 
 | ||||
| ### Stochastic Search | ||||
| |Name| Description|Horizon|Action Dimension|Observation Dimension | ||||
| |---|---|---|---|---| | ||||
|  | ||||
| @ -6,6 +6,7 @@ from alr_envs.classic_control.simple_reacher.simple_reacher_mp_wrapper import Si | ||||
| from alr_envs.classic_control.viapoint_reacher.viapoint_reacher_mp_wrapper import ViaPointReacherMPWrapper | ||||
| from alr_envs.dmc.ball_in_cup.ball_in_the_cup_mp_wrapper import DMCBallInCupMPWrapper | ||||
| from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_mp_wrapper import BallInACupMPWrapper | ||||
| from alr_envs.open_ai import reacher_v2, continuous_mountain_car, fetch | ||||
| from alr_envs.stochastic_search.functions.f_rosenbrock import Rosenbrock | ||||
| 
 | ||||
| # Mujoco | ||||
| @ -566,6 +567,83 @@ register( | ||||
|     } | ||||
| ) | ||||
| 
 | ||||
| ## Open AI | ||||
| register( | ||||
|     id='ContinuousMountainCarDetPMP-v0', | ||||
|     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', | ||||
|     kwargs={ | ||||
|         "name": "gym.envs.classic_control:MountainCarContinuous-v0", | ||||
|         "wrappers": [continuous_mountain_car.MPWrapper], | ||||
|         "mp_kwargs": { | ||||
|             "num_dof": 1, | ||||
|             "num_basis": 4, | ||||
|             "duration": 2, | ||||
|             "post_traj_time": 0, | ||||
|             "width": 0.02, | ||||
|             "policy_type": "motor", | ||||
|             "policy_kwargs": { | ||||
|                 "p_gains": 1., | ||||
|                 "d_gains": 1. | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| ) | ||||
| 
 | ||||
| register( | ||||
|     id='ReacherDetPMP-v2', | ||||
|     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', | ||||
|     kwargs={ | ||||
|         "name": "gym.envs.mujoco:Reacher-v2", | ||||
|         "wrappers": [reacher_v2.MPWrapper], | ||||
|         "mp_kwargs": { | ||||
|             "num_dof": 2, | ||||
|             "num_basis": 6, | ||||
|             "duration": 1, | ||||
|             "post_traj_time": 0, | ||||
|             "width": 0.02, | ||||
|             "policy_type": "motor", | ||||
|             "policy_kwargs": { | ||||
|                 "p_gains": .6, | ||||
|                 "d_gains": .075 | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| ) | ||||
| 
 | ||||
| register( | ||||
|     id='FetchSlideDenseDetPMP-v1', | ||||
|     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', | ||||
|     kwargs={ | ||||
|         "name": "gym.envs.robotics:FetchSlideDense-v1", | ||||
|         "wrappers": [fetch.MPWrapper], | ||||
|         "mp_kwargs": { | ||||
|             "num_dof": 4, | ||||
|             "num_basis": 5, | ||||
|             "duration": 2, | ||||
|             "post_traj_time": 0, | ||||
|             "width": 0.02, | ||||
|             "policy_type": "position" | ||||
|         } | ||||
|     } | ||||
| ) | ||||
| 
 | ||||
| register( | ||||
|     id='FetchReachDenseDetPMP-v1', | ||||
|     entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env_helper', | ||||
|     kwargs={ | ||||
|         "name": "gym.envs.robotics:FetchReachDense-v1", | ||||
|         "wrappers": [fetch.MPWrapper], | ||||
|         "mp_kwargs": { | ||||
|             "num_dof": 4, | ||||
|             "num_basis": 5, | ||||
|             "duration": 2, | ||||
|             "post_traj_time": 0, | ||||
|             "width": 0.02, | ||||
|             "policy_type": "position" | ||||
|         } | ||||
|     } | ||||
| ) | ||||
| 
 | ||||
| # BBO functions | ||||
| 
 | ||||
| for dim in [5, 10, 25, 50, 100]: | ||||
|  | ||||
							
								
								
									
										41
									
								
								alr_envs/examples/examples_open_ai.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								alr_envs/examples/examples_open_ai.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,41 @@ | ||||
| from alr_envs.utils.make_env_helpers import make_env | ||||
| 
 | ||||
| 
 | ||||
| def example_mp(env_name, seed=1): | ||||
|     """ | ||||
|     Example for running a motion primitive based version of a OpenAI-gym environment, which is already registered. | ||||
|     For more information on motion primitive specific stuff, look at the mp examples. | ||||
|     Args: | ||||
|         env_name: DetPMP env_id | ||||
|         seed: seed | ||||
| 
 | ||||
|     Returns: | ||||
| 
 | ||||
|     """ | ||||
|     # While in this case gym.make() is possible to use as well, we recommend our custom make env function. | ||||
|     env = make_env(env_name, seed) | ||||
| 
 | ||||
|     rewards = 0 | ||||
|     obs = env.reset() | ||||
| 
 | ||||
|     # number of samples/full trajectories (multiple environment steps) | ||||
|     for i in range(10): | ||||
|         ac = env.action_space.sample() | ||||
|         obs, reward, done, info = env.step(ac) | ||||
|         rewards += reward | ||||
| 
 | ||||
|         if done: | ||||
|             print(rewards) | ||||
|             rewards = 0 | ||||
|             obs = env.reset() | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     # DMP - not supported yet | ||||
|     #example_mp("ReacherDetPMP-v2") | ||||
| 
 | ||||
|     # DetProMP | ||||
|     example_mp("ContinuousMountainCarDetPMP-v0") | ||||
|     example_mp("ReacherDetPMP-v2") | ||||
|     example_mp("FetchReachDenseDetPMP-v1") | ||||
|     example_mp("FetchSlideDenseDetPMP-v1") | ||||
| 
 | ||||
							
								
								
									
										0
									
								
								alr_envs/open_ai/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								alr_envs/open_ai/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										1
									
								
								alr_envs/open_ai/continuous_mountain_car/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								alr_envs/open_ai/continuous_mountain_car/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| from alr_envs.open_ai.continuous_mountain_car.mp_wrapper import MPWrapper | ||||
							
								
								
									
										22
									
								
								alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								alr_envs/open_ai/continuous_mountain_car/mp_wrapper.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | ||||
| from typing import Union | ||||
| 
 | ||||
| import numpy as np | ||||
| from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper | ||||
| 
 | ||||
| 
 | ||||
| class MPWrapper(MPEnvWrapper): | ||||
|     @property | ||||
|     def current_vel(self) -> Union[float, int, np.ndarray]: | ||||
|         return np.array([self.state[1]]) | ||||
| 
 | ||||
|     @property | ||||
|     def current_pos(self) -> Union[float, int, np.ndarray]: | ||||
|         return np.array([self.state[0]]) | ||||
| 
 | ||||
|     @property | ||||
|     def goal_pos(self): | ||||
|         raise ValueError("Goal position is not available and has to be learnt based on the environment.") | ||||
| 
 | ||||
|     @property | ||||
|     def dt(self) -> Union[float, int]: | ||||
|         return 0.02 | ||||
							
								
								
									
										1
									
								
								alr_envs/open_ai/fetch/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								alr_envs/open_ai/fetch/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| from alr_envs.open_ai.fetch.mp_wrapper import MPWrapper | ||||
							
								
								
									
										22
									
								
								alr_envs/open_ai/fetch/mp_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								alr_envs/open_ai/fetch/mp_wrapper.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | ||||
| from typing import Union | ||||
| 
 | ||||
| import numpy as np | ||||
| from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper | ||||
| 
 | ||||
| 
 | ||||
| class MPWrapper(MPEnvWrapper): | ||||
|     @property | ||||
|     def current_vel(self) -> Union[float, int, np.ndarray]: | ||||
|         return self.unwrapped._get_obs()["observation"][-5:-1] | ||||
| 
 | ||||
|     @property | ||||
|     def current_pos(self) -> Union[float, int, np.ndarray]: | ||||
|         return self.unwrapped._get_obs()["observation"][:4] | ||||
| 
 | ||||
|     @property | ||||
|     def goal_pos(self): | ||||
|         raise ValueError("Goal position is not available and has to be learnt based on the environment.") | ||||
| 
 | ||||
|     @property | ||||
|     def dt(self) -> Union[float, int]: | ||||
|         return self.env.dt | ||||
							
								
								
									
										1
									
								
								alr_envs/open_ai/reacher_v2/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								alr_envs/open_ai/reacher_v2/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| from alr_envs.open_ai.reacher_v2.mp_wrapper import MPWrapper | ||||
							
								
								
									
										19
									
								
								alr_envs/open_ai/reacher_v2/mp_wrapper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								alr_envs/open_ai/reacher_v2/mp_wrapper.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| from typing import Union | ||||
| 
 | ||||
| import numpy as np | ||||
| from mp_env_api.interface_wrappers.mp_env_wrapper import MPEnvWrapper | ||||
| 
 | ||||
| 
 | ||||
| class MPWrapper(MPEnvWrapper): | ||||
| 
 | ||||
|     @property | ||||
|     def current_vel(self) -> Union[float, int, np.ndarray]: | ||||
|         return self.sim.data.qvel[:2] | ||||
| 
 | ||||
|     @property | ||||
|     def current_pos(self) -> Union[float, int, np.ndarray]: | ||||
|         return self.sim.data.qpos[:2] | ||||
| 
 | ||||
|     @property | ||||
|     def dt(self) -> Union[float, int]: | ||||
|         return self.env.dt | ||||
| @ -1,10 +0,0 @@ | ||||
| Metadata-Version: 1.0 | ||||
| Name: reacher | ||||
| Version: 0.0.1 | ||||
| Summary: UNKNOWN | ||||
| Home-page: UNKNOWN | ||||
| Author: UNKNOWN | ||||
| Author-email: UNKNOWN | ||||
| License: UNKNOWN | ||||
| Description: UNKNOWN | ||||
| Platform: UNKNOWN | ||||
| @ -1,7 +0,0 @@ | ||||
| README.md | ||||
| setup.py | ||||
| reacher.egg-info/PKG-INFO | ||||
| reacher.egg-info/SOURCES.txt | ||||
| reacher.egg-info/dependency_links.txt | ||||
| reacher.egg-info/requires.txt | ||||
| reacher.egg-info/top_level.txt | ||||
| @ -1 +0,0 @@ | ||||
| 
 | ||||
| @ -1 +0,0 @@ | ||||
| gym | ||||
| @ -1 +0,0 @@ | ||||
| 
 | ||||
							
								
								
									
										5
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								setup.py
									
									
									
									
									
								
							| @ -3,14 +3,15 @@ from setuptools import setup | ||||
| setup( | ||||
|     name='alr_envs', | ||||
|     version='0.0.1', | ||||
|     packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.mujoco', 'alr_envs.stochastic_search', | ||||
|     packages=['alr_envs', 'alr_envs.classic_control', 'alr_envs.open_ai', 'alr_envs.mujoco', 'alr_envs.stochastic_search', | ||||
|               'alr_envs.utils'], | ||||
|     install_requires=[ | ||||
|         'gym', | ||||
|         'PyQt5', | ||||
|         'matplotlib', | ||||
|         'mp_env_api @ git+ssh://git@github.com/ALRhub/motion_primitive_env_api.git', | ||||
|         'mujoco_py' | ||||
|         'mujoco-py<2.1,>=2.0', | ||||
|         'dm_control' | ||||
|     ], | ||||
| 
 | ||||
|     url='https://github.com/ALRhub/alr_envs/', | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user