diff --git a/README.md b/README.md index ac012c4..5d267a3 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ keys `DMP` and `ProMP` that store a list of available environment names. import alr_envs print("Custom MP tasks:") -print(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS) +print(alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("OpenAI Gym MP tasks:") print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS) @@ -116,7 +116,7 @@ print("Deepmind Control MP tasks:") print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("MetaWorld MP tasks:") -print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS) +print(alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS) ``` ### How to create a new MP task diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index e4a405d..d63a656 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -2,13 +2,13 @@ from alr_envs import dmc, meta, open_ai from alr_envs.utils.make_env_helpers import make, make_bb, make_rank # Convenience function for all MP environments -from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS +from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS -from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS +from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] + - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key] - for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()} + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} diff --git a/alr_envs/alr/__init__.py b/alr_envs/envs/__init__.py similarity index 87% rename from alr_envs/alr/__init__.py rename to alr_envs/envs/__init__.py index 3aea422..2f3b713 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/envs/__init__.py @@ -16,7 +16,7 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER from .mujoco.reacher.reacher import ReacherEnv from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} DEFAULT_BB_DICT_ProMP = { "name": 'EnvName', @@ -63,7 +63,7 @@ DEFAULT_BB_DICT_DMP = { ## Simple Reacher register( id='SimpleReacher-v0', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 2, @@ -72,7 +72,7 @@ register( register( id='LongSimpleReacher-v0', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -83,7 +83,7 @@ register( register( id='ViaPointReacher-v0', - entry_point='alr_envs.alr.classic_control:ViaPointReacherEnv', + entry_point='alr_envs.envs.classic_control:ViaPointReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -95,7 +95,7 @@ register( ## Hole Reacher register( id='HoleReacher-v0', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.envs.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -115,7 +115,7 @@ register( for _dims in [5, 7]: register( id=f'Reacher{_dims}d-v0', - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "n_links": _dims, @@ -124,7 +124,7 @@ for _dims in [5, 7]: register( id=f'Reacher{_dims}dSparse-v0', - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "sparse": True, @@ -134,7 +134,7 @@ for _dims in [5, 7]: register( id='HopperJumpSparse-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "sparse": True, @@ -143,7 +143,7 @@ register( register( id='HopperJump-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "sparse": False, @@ -155,43 +155,43 @@ register( register( id='ALRAntJump-v0', - entry_point='alr_envs.alr.mujoco:AntJumpEnv', + entry_point='alr_envs.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, ) register( id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRHalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, ) register( id='HopperJumpOnBox-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpOnBoxEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, ) register( id='ALRHopperThrow-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, ) register( id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, ) register( id='ALRWalker2DJump-v0', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRWalker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, ) register( id='BeerPong-v0', - entry_point='alr_envs.alr.mujoco:BeerPongEnv', + entry_point='alr_envs.envs.mujoco:BeerPongEnv', max_episode_steps=300, ) @@ -199,14 +199,14 @@ register( # only one time step, i.e. we simulate until the end of th episode register( id='BeerPongStepBased-v0', - entry_point='alr_envs.alr.mujoco:BeerPongEnvStepBasedEpisodicReward', + entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', max_episode_steps=300, ) # Beerpong with episodic reward, but fixed release time step register( id='BeerPongFixedRelease-v0', - entry_point='alr_envs.alr.mujoco:BeerPongEnvFixedReleaseStep', + entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep', max_episode_steps=300, ) @@ -229,7 +229,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_dmp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -242,7 +242,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # Viapoint reacher kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) @@ -257,7 +257,7 @@ register( # max_episode_steps=1, kwargs=kwargs_dict_via_point_reacher_dmp ) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) @@ -268,7 +268,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_via_point_reacher_promp ) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") +ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ## Hole Reacher _versions = ["HoleReacher-v0"] @@ -288,7 +288,7 @@ for _v in _versions: # max_episode_steps=1, kwargs=kwargs_dict_hole_reacher_dmp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -301,7 +301,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hole_reacher_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## ReacherNd _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"] @@ -320,7 +320,7 @@ for _v in _versions: # max_episode_steps=1, kwargs=kwargs_dict_reacherNd_dmp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -333,7 +333,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_alr_reacher_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## ## Beerpong ProMP @@ -354,7 +354,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ### BP with Fixed release _versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"] @@ -374,7 +374,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## ## Table Tennis needs to be fixed according to Zhou's implementation @@ -395,7 +395,7 @@ for _v in _versions: # entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_ant_jump_promp # ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ######################################################################################################################## # @@ -412,7 +412,7 @@ for _v in _versions: # entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_halfcheetah_jump_promp # ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ######################################################################################################################## @@ -433,7 +433,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hopper_jump_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ######################################################################################################################## # @@ -451,13 +451,13 @@ for _v in _versions: # entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_walker2d_jump_promp # ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ### Depricated, we will not provide non random starts anymore """ register( id='SimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 2, @@ -467,7 +467,7 @@ register( register( id='LongSimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -476,7 +476,7 @@ register( ) register( id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.envs.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -491,7 +491,7 @@ register( ) register( id='HoleReacher-v2', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.envs.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -508,7 +508,7 @@ register( # CtxtFree are v0, Contextual are v1 register( id='ALRAntJump-v0', - entry_point='alr_envs.alr.mujoco:AntJumpEnv', + entry_point='alr_envs.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, @@ -518,7 +518,7 @@ register( # CtxtFree are v0, Contextual are v1 register( id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRHalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, @@ -527,7 +527,7 @@ register( ) register( id='ALRHopperJump-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, @@ -545,7 +545,7 @@ for i in _vs: _env_id = f'ALRReacher{i}-v0' register( id=_env_id, - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "steps_before_reward": 0, @@ -558,7 +558,7 @@ for i in _vs: _env_id = f'ALRReacherSparse{i}-v0' register( id=_env_id, - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "steps_before_reward": 200, @@ -617,7 +617,7 @@ for i in _vs: register( id='ALRHopperJumpOnBox-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpOnBoxEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, @@ -626,7 +626,7 @@ for i in _vs: ) register( id='ALRHopperThrow-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, @@ -635,7 +635,7 @@ for i in _vs: ) register( id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, @@ -644,7 +644,7 @@ for i in _vs: ) register( id='ALRWalker2DJump-v0', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRWalker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, @@ -652,13 +652,13 @@ for i in _vs: } ) register(id='TableTennis2DCtxt-v1', - entry_point='alr_envs.alr.mujoco:TTEnvGym', + entry_point='alr_envs.envs.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 2, 'fixed_goal': True}) register( id='ALRBeerPong-v0', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + entry_point='alr_envs.envs.mujoco:ALRBeerBongEnv', max_episode_steps=300, kwargs={ "rndm_goal": False, diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/envs/classic_control/README.MD similarity index 100% rename from alr_envs/alr/classic_control/README.MD rename to alr_envs/envs/classic_control/README.MD diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/envs/classic_control/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/__init__.py rename to alr_envs/envs/classic_control/__init__.py diff --git a/alr_envs/alr/classic_control/base_reacher/__init__.py b/alr_envs/envs/classic_control/base_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/base_reacher/__init__.py rename to alr_envs/envs/classic_control/base_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/envs/classic_control/base_reacher/base_reacher.py similarity index 98% rename from alr_envs/alr/classic_control/base_reacher/base_reacher.py rename to alr_envs/envs/classic_control/base_reacher/base_reacher.py index 1af8187..f9186d8 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py +++ b/alr_envs/envs/classic_control/base_reacher/base_reacher.py @@ -7,7 +7,7 @@ from gym import spaces from gym.core import ObsType from gym.utils import seeding -from alr_envs.alr.classic_control.utils import intersect +from alr_envs.envs.classic_control.utils import intersect class BaseReacherEnv(gym.Env, ABC): diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/envs/classic_control/base_reacher/base_reacher_direct.py similarity index 93% rename from alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py rename to alr_envs/envs/classic_control/base_reacher/base_reacher_direct.py index dc79827..05cff5b 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/envs/classic_control/base_reacher/base_reacher_direct.py @@ -2,7 +2,7 @@ from abc import ABC from gym import spaces import numpy as np -from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv class BaseReacherDirectEnv(BaseReacherEnv, ABC): diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/envs/classic_control/base_reacher/base_reacher_torque.py similarity index 92% rename from alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py rename to alr_envs/envs/classic_control/base_reacher/base_reacher_torque.py index 094f632..469d8a3 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py +++ b/alr_envs/envs/classic_control/base_reacher/base_reacher_torque.py @@ -2,7 +2,7 @@ from abc import ABC from gym import spaces import numpy as np -from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv class BaseReacherTorqueEnv(BaseReacherEnv, ABC): diff --git a/alr_envs/alr/classic_control/hole_reacher/__init__.py b/alr_envs/envs/classic_control/hole_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/__init__.py rename to alr_envs/envs/classic_control/hole_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/envs/classic_control/hole_reacher/hole_reacher.py similarity index 95% rename from alr_envs/alr/classic_control/hole_reacher/hole_reacher.py rename to alr_envs/envs/classic_control/hole_reacher/hole_reacher.py index 8f0122f..0bd0e5c 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/envs/classic_control/hole_reacher/hole_reacher.py @@ -6,7 +6,7 @@ import numpy as np from gym.core import ObsType from matplotlib import patches -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv class HoleReacherEnv(BaseReacherDirectEnv): @@ -41,13 +41,13 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) if rew_fct == "simple": - from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward + from alr_envs.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) elif rew_fct == "vel_acc": - from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward + from alr_envs.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) elif rew_fct == "unbounded": - from alr_envs.alr.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward + from alr_envs.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision) else: raise ValueError("Unknown reward function {}".format(rew_fct)) diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/envs/classic_control/hole_reacher/hr_dist_vel_acc_reward.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py rename to alr_envs/envs/classic_control/hole_reacher/hr_dist_vel_acc_reward.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py b/alr_envs/envs/classic_control/hole_reacher/hr_simple_reward.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py rename to alr_envs/envs/classic_control/hole_reacher/hr_simple_reward.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py b/alr_envs/envs/classic_control/hole_reacher/hr_unbounded_reward.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py rename to alr_envs/envs/classic_control/hole_reacher/hr_unbounded_reward.py diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/envs/classic_control/hole_reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py rename to alr_envs/envs/classic_control/hole_reacher/mp_wrapper.py diff --git a/alr_envs/alr/classic_control/simple_reacher/__init__.py b/alr_envs/envs/classic_control/simple_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/simple_reacher/__init__.py rename to alr_envs/envs/classic_control/simple_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py b/alr_envs/envs/classic_control/simple_reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py rename to alr_envs/envs/classic_control/simple_reacher/mp_wrapper.py diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/envs/classic_control/simple_reacher/simple_reacher.py similarity index 97% rename from alr_envs/alr/classic_control/simple_reacher/simple_reacher.py rename to alr_envs/envs/classic_control/simple_reacher/simple_reacher.py index eb079d0..8c6f8d5 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/envs/classic_control/simple_reacher/simple_reacher.py @@ -5,7 +5,7 @@ import numpy as np from gym import spaces from gym.core import ObsType -from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv class SimpleReacherEnv(BaseReacherTorqueEnv): diff --git a/alr_envs/alr/classic_control/utils.py b/alr_envs/envs/classic_control/utils.py similarity index 100% rename from alr_envs/alr/classic_control/utils.py rename to alr_envs/envs/classic_control/utils.py diff --git a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py b/alr_envs/envs/classic_control/viapoint_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/viapoint_reacher/__init__.py rename to alr_envs/envs/classic_control/viapoint_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/envs/classic_control/viapoint_reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py rename to alr_envs/envs/classic_control/viapoint_reacher/mp_wrapper.py diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/envs/classic_control/viapoint_reacher/viapoint_reacher.py similarity index 98% rename from alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py rename to alr_envs/envs/classic_control/viapoint_reacher/viapoint_reacher.py index 569ca2c..9266721 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -6,7 +6,7 @@ import numpy as np from gym.core import ObsType from gym.utils import seeding -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv class ViaPointReacherEnv(BaseReacherDirectEnv): diff --git a/alr_envs/alr/mujoco/README.MD b/alr_envs/envs/mujoco/README.MD similarity index 100% rename from alr_envs/alr/mujoco/README.MD rename to alr_envs/envs/mujoco/README.MD diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/envs/mujoco/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/__init__.py rename to alr_envs/envs/mujoco/__init__.py diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/envs/mujoco/ant_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/ant_jump/__init__.py rename to alr_envs/envs/mujoco/ant_jump/__init__.py diff --git a/alr_envs/alr/mujoco/ant_jump/ant_jump.py b/alr_envs/envs/mujoco/ant_jump/ant_jump.py similarity index 92% rename from alr_envs/alr/mujoco/ant_jump/ant_jump.py rename to alr_envs/envs/mujoco/ant_jump/ant_jump.py index eddfbe0..74a66a3 100644 --- a/alr_envs/alr/mujoco/ant_jump/ant_jump.py +++ b/alr_envs/envs/mujoco/ant_jump/ant_jump.py @@ -55,7 +55,7 @@ class AntJumpEnv(AntEnv): costs = ctrl_cost + contact_cost - done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle + done = bool(height < 0.3) # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle if self.current_step == MAX_EPISODE_STEPS_ANTJUMP or done: # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled @@ -84,8 +84,8 @@ class AntJumpEnv(AntEnv): options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: self.current_step = 0 self.max_height = 0 - self.goal = self.np_random.uniform(1.0, 2.5, - 1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE + # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE + self.goal = self.np_random.uniform(1.0, 2.5, 1) return super().reset() # reset_model had to be implemented in every env to make it deterministic diff --git a/alr_envs/alr/mujoco/ant_jump/assets/ant.xml b/alr_envs/envs/mujoco/ant_jump/assets/ant.xml similarity index 100% rename from alr_envs/alr/mujoco/ant_jump/assets/ant.xml rename to alr_envs/envs/mujoco/ant_jump/assets/ant.xml diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/ant_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/ant_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/envs/mujoco/beerpong/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/__init__.py rename to alr_envs/envs/mujoco/beerpong/__init__.py diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong.xml similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/beerpong.xml rename to alr_envs/envs/mujoco/beerpong/assets/beerpong.xml diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup.xml similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml rename to alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup.xml diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml rename to alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split10.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split10.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split10.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split10.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split11.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split11.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split11.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split11.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split12.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split12.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split12.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split12.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split13.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split13.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split13.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split13.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split14.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split14.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split14.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split14.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split15.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split15.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split15.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split15.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split16.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split16.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split16.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split16.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split17.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split17.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split17.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split17.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split18.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split18.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split18.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split18.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split4.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split4.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split4.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split4.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split5.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split5.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split5.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split5.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split6.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split6.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split6.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split6.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split7.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split7.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split7.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split7.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split8.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split8.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split8.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split8.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split9.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split9.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split9.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split9.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/envs/mujoco/beerpong/beerpong.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/beerpong.py rename to alr_envs/envs/mujoco/beerpong/beerpong.py diff --git a/alr_envs/alr/mujoco/beerpong/deprecated/__init__.py b/alr_envs/envs/mujoco/beerpong/deprecated/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/deprecated/__init__.py rename to alr_envs/envs/mujoco/beerpong/deprecated/__init__.py diff --git a/alr_envs/alr/mujoco/beerpong/deprecated/beerpong.py b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py similarity index 98% rename from alr_envs/alr/mujoco/beerpong/deprecated/beerpong.py rename to alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py index 0fe7a42..cc9a9de 100644 --- a/alr_envs/alr/mujoco/beerpong/deprecated/beerpong.py +++ b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py @@ -5,7 +5,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv -from alr_envs.alr.mujoco.beerpong.deprecated.beerpong_reward_staged import BeerPongReward +from alr_envs.envs.mujoco.beerpong.deprecated.beerpong_reward_staged import BeerPongReward class BeerPongEnv(MujocoEnv, utils.EzPickle): diff --git a/alr_envs/alr/mujoco/beerpong/deprecated/beerpong_reward_staged.py b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong_reward_staged.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/deprecated/beerpong_reward_staged.py rename to alr_envs/envs/mujoco/beerpong/deprecated/beerpong_reward_staged.py diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/mp_wrapper.py rename to alr_envs/envs/mujoco/beerpong/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py b/alr_envs/envs/mujoco/half_cheetah_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/__init__.py rename to alr_envs/envs/mujoco/half_cheetah_jump/__init__.py diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml b/alr_envs/envs/mujoco/half_cheetah_jump/assets/cheetah.xml similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml rename to alr_envs/envs/mujoco/half_cheetah_jump/assets/cheetah.xml diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py rename to alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/envs/mujoco/hopper_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/__init__.py rename to alr_envs/envs/mujoco/hopper_jump/__init__.py diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml b/alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml rename to alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump.xml diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml b/alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml rename to alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/hopper_jump.py rename to alr_envs/envs/mujoco/hopper_jump/hopper_jump.py diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py similarity index 98% rename from alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py rename to alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py index ac7e16b..845edaa 100644 --- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -134,7 +134,7 @@ class HopperJumpOnBoxEnv(HopperEnv): self.hopper_on_box = False if self.context: box_id = self.sim.model.body_name2id("box") - self.box_x = np.random.uniform(1, 3, 1) + self.box_x = self.np_random.uniform(1, 3, 1) self.sim.model.body_pos[box_id] = [self.box_x, 0, 0] return super().reset() diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/hopper_throw/__init__.py b/alr_envs/envs/mujoco/hopper_throw/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/__init__.py rename to alr_envs/envs/mujoco/hopper_throw/__init__.py diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml b/alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml rename to alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw.xml diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml b/alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml rename to alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py similarity index 95% rename from alr_envs/alr/mujoco/hopper_throw/hopper_throw.py rename to alr_envs/envs/mujoco/hopper_throw/hopper_throw.py index c2503c4..5630958 100644 --- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py @@ -1,4 +1,5 @@ import os +from typing import Optional from gym.envs.mujoco.hopper_v3 import HopperEnv import numpy as np @@ -47,7 +48,7 @@ class ALRHopperThrowEnv(HopperEnv): ball_pos_after_y = self.get_body_com("ball")[2] # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc - ball_landed = self.get_body_com("ball")[2] <= 0.05 + ball_landed = bool(self.get_body_com("ball")[2] <= 0.05) done = ball_landed ctrl_cost = self.control_cost(action) @@ -76,7 +77,7 @@ class ALRHopperThrowEnv(HopperEnv): def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self): + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_step = 0 self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0 return super().reset() diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py similarity index 94% rename from alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py rename to alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 6827bf8..7ea9675 100644 --- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -1,4 +1,6 @@ import os +from typing import Optional + from gym.envs.mujoco.hopper_v3 import HopperEnv import numpy as np @@ -57,13 +59,14 @@ class ALRHopperThrowInBasketEnv(HopperEnv): is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size / 2) and ball_pos[1] <= basket_pos[1] + ( - self.basket_size / 2) + self.basket_size / 2) is_in_basket_z = ball_pos[2] < 0.1 is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z - if is_in_basket: self.ball_in_basket = True + if is_in_basket: + self.ball_in_basket = True ball_landed = self.get_body_com("ball")[2] <= 0.05 - done = ball_landed or is_in_basket + done = bool(ball_landed or is_in_basket) rewards = 0 @@ -98,7 +101,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv): def _get_obs(self): return np.append(super()._get_obs(), self.basket_x) - def reset(self): + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): if self.max_episode_steps == 10: # We have to initialize this here, because the spec is only added after creating the env. self.max_episode_steps = self.spec.max_episode_steps diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py rename to alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/envs/mujoco/reacher/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/reacher/__init__.py rename to alr_envs/envs/mujoco/reacher/__init__.py diff --git a/alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml b/alr_envs/envs/mujoco/reacher/assets/reacher_5links.xml similarity index 100% rename from alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml rename to alr_envs/envs/mujoco/reacher/assets/reacher_5links.xml diff --git a/alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml b/alr_envs/envs/mujoco/reacher/assets/reacher_7links.xml similarity index 100% rename from alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml rename to alr_envs/envs/mujoco/reacher/assets/reacher_7links.xml diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/envs/mujoco/reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/reacher/mp_wrapper.py rename to alr_envs/envs/mujoco/reacher/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/reacher/reacher.py b/alr_envs/envs/mujoco/reacher/reacher.py similarity index 100% rename from alr_envs/alr/mujoco/reacher/reacher.py rename to alr_envs/envs/mujoco/reacher/reacher.py diff --git a/alr_envs/alr/mujoco/walker_2d_jump/__init__.py b/alr_envs/envs/mujoco/walker_2d_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/walker_2d_jump/__init__.py rename to alr_envs/envs/mujoco/walker_2d_jump/__init__.py diff --git a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml b/alr_envs/envs/mujoco/walker_2d_jump/assets/walker2d.xml similarity index 100% rename from alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml rename to alr_envs/envs/mujoco/walker_2d_jump/assets/walker2d.xml diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py similarity index 86% rename from alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py rename to alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py index 1ab0d29..5b143bc 100644 --- a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -1,9 +1,12 @@ import os +from typing import Optional + from gym.envs.mujoco.walker2d_v3 import Walker2dEnv import numpy as np MAX_EPISODE_STEPS_WALKERJUMP = 300 + # TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended # to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as height # as possible, while landing at a specific target position @@ -36,16 +39,16 @@ class ALRWalker2dJumpEnv(Walker2dEnv): super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, healthy_angle_range, reset_noise_scale, exclude_current_positions_from_observation) - + def step(self, action): self.current_step += 1 self.do_simulation(action, self.frame_skip) - #pos_after = self.get_body_com("torso")[0] + # pos_after = self.get_body_com("torso")[0] height = self.get_body_com("torso")[2] self.max_height = max(height, self.max_height) - done = height < 0.2 + done = bool(height < 0.2) ctrl_cost = self.control_cost(action) costs = ctrl_cost @@ -70,10 +73,10 @@ class ALRWalker2dJumpEnv(Walker2dEnv): def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self): + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_step = 0 self.max_height = 0 - self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0 + self.goal = self.np_random.uniform(1.5, 2.5, 1) # 1.5 3.0 return super().reset() # overwrite reset_model to make it deterministic @@ -81,14 +84,15 @@ class ALRWalker2dJumpEnv(Walker2dEnv): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale - qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) - qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) + qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) + qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) self.set_state(qpos, qvel) observation = self._get_obs() return observation + if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" env = ALRWalker2dJumpEnv() diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py index 27cf8f8..79161d4 100644 --- a/alr_envs/examples/pd_control_gain_tuning.py +++ b/alr_envs/examples/pd_control_gain_tuning.py @@ -2,7 +2,7 @@ import numpy as np from matplotlib import pyplot as plt from alr_envs import dmc, meta -from alr_envs.alr import mujoco +from alr_envs.envs import mujoco from alr_envs.utils.make_env_helpers import make_promp_env diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py index 6ccd622..fcc87cc 100644 --- a/alr_envs/meta/__init__.py +++ b/alr_envs/meta/__init__.py @@ -5,7 +5,7 @@ from gym import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper -ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # MetaWorld @@ -43,7 +43,7 @@ for _task in _goal_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_goal_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"] for _task in _object_change_envs: @@ -58,7 +58,7 @@ for _task in _object_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_object_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2", "button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2", @@ -84,7 +84,7 @@ for _task in _goal_and_object_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_goal_and_object_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _goal_and_endeffector_change_envs = ["basketball-v2"] for _task in _goal_and_endeffector_change_envs: @@ -100,4 +100,4 @@ for _task in _goal_and_endeffector_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_goal_and_endeffector_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) diff --git a/setup.py b/setup.py index 3b78401..02dc453 100644 --- a/setup.py +++ b/setup.py @@ -31,10 +31,10 @@ setup( "mujoco_py<2.2,>=2.1", ], packages=[package for package in find_packages() if package.startswith("alr_envs")], - # packages=['alr_envs', 'alr_envs.alr', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'], + # packages=['alr_envs', 'alr_envs.envs', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'], package_data={ "alr_envs": [ - "alr/mujoco/*/assets/*.xml", + "envs/mujoco/*/assets/*.xml", ] }, python_requires=">=3.6", diff --git a/test/test_bb_envs.py b/test/test_bb_envs.py deleted file mode 100644 index 49eb31e..0000000 --- a/test/test_bb_envs.py +++ /dev/null @@ -1,168 +0,0 @@ -import unittest - -import gym -import numpy as np - -import alr_envs # noqa -from alr_envs.utils.make_env_helpers import make - -ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point) -SEED = 1 - - -class TestMPEnvironments(unittest.TestCase): - - def _run_env(self, env_id, iterations=None, seed=SEED, render=False): - """ - Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or - for manipulation tasks as `manipulation-environment_name` - - Args: - env_id: Either `domain_name-task_name` or `manipulation-environment_name` - iterations: Number of rollout steps to run - seed= random seeding - render: Render the episode - - Returns: - - """ - env: gym.Env = make(env_id, seed=seed) - rewards = [] - observations = [] - dones = [] - obs = env.reset() - self._verify_observations(obs, env.observation_space, "reset()") - - iterations = iterations or (env.spec.max_episode_steps or 1) - - # number of samples(multiple environment steps) - for i in range(iterations): - observations.append(obs) - - actions = env.action_space.sample() - # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) - obs, reward, done, info = env.step(actions) - - self._verify_observations(obs, env.observation_space, "step()") - self._verify_reward(reward) - self._verify_done(done) - - rewards.append(reward) - dones.append(done) - - if render: - env.render("human") - - if done: - break - - assert done, "Done flag is not True after end of episode." - observations.append(obs) - env.close() - del env - return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) - - def _run_env_determinism(self, ids): - seed = 0 - for env_id in ids: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") - - def _verify_observations(self, obs, observation_space, obs_type="reset()"): - self.assertTrue(observation_space.contains(obs), - f"Observation {obs} received from {obs_type} " - f"not contained in observation space {observation_space}.") - - def _verify_reward(self, reward): - self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") - - def _verify_done(self, done): - self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - - def test_alr_environment_functionality(self): - """Tests that environments runs without errors using random actions for ALR MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_openai_environment_functionality(self): - """Tests that environments runs without errors using random actions for OpenAI gym MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_dmc_environment_functionality(self): - """Tests that environments runs without errors using random actions for DMC MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_metaworld_environment_functionality(self): - """Tests that environments runs without errors using random actions for Metaworld MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_alr_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for ALR MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - def test_openai_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for OpenAI gym MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - def test_dmc_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for DMC MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - def test_metaworld_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for Metaworld MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_custom.py b/test/test_custom.py new file mode 100644 index 0000000..37bc48d --- /dev/null +++ b/test/test_custom.py @@ -0,0 +1,118 @@ +import unittest + +import gym +import numpy as np + +import alr_envs # noqa +from alr_envs.utils.make_env_helpers import make + +CUSTOM_IDS = [spec.id for spec in gym.envs.registry.all() if + "alr_envs" in spec.entry_point and not 'make_bb_env_helper' in spec.entry_point] +SEED = 1 + + +class TestCustomEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed: random seeding + render: Render the episode + + Returns: observations, rewards, dones, actions + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + actions = [] + observations = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + iterations = iterations or (env.spec.max_episode_steps or 1) + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + actions.append(ac) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + break + + assert done, "Done flag is not True after end of episode." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_step_functionality(self): + """Tests that step environments run without errors using random actions.""" + for env_id in CUSTOM_IDS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_step_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(CUSTOM_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dmc_envs.py b/test/test_dmc.py similarity index 63% rename from test/test_dmc_envs.py rename to test/test_dmc.py index a90814d..4a67ecd 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc.py @@ -5,31 +5,31 @@ import numpy as np from dm_control import suite, manipulation +import alr_envs from alr_envs import make -DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] -MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] +MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] SEED = 1 -class TestStepDMCEnvironments(unittest.TestCase): +class TestDMCEnvironments(unittest.TestCase): def _run_env(self, env_id, iterations=None, seed=SEED, render=False): """ Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or + The env_id has to be specified as `dmc:domain_name-task_name` or for manipulation tasks as `manipulation-environment_name` Args: - env_id: Either `domain_name-task_name` or `manipulation-environment_name` + env_id: Either `dmc:domain_name-task_name` or `dmc:manipulation-environment_name` iterations: Number of rollout steps to run - seed= random seeding + seed: random seeding render: Render the episode - Returns: + Returns: observations, rewards, dones, actions """ - print(env_id) env: gym.Env = make(env_id, seed=seed) rewards = [] observations = [] @@ -68,6 +68,19 @@ class TestStepDMCEnvironments(unittest.TestCase): del env return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + def _verify_observations(self, obs, observation_space, obs_type="reset()"): self.assertTrue(observation_space.contains(obs), f"Observation {obs} received from {obs_type} " @@ -79,47 +92,39 @@ class TestStepDMCEnvironments(unittest.TestCase): def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - def test_dmc_functionality(self): - """Tests that environments runs without errors using random actions.""" - for env_id in DMC_ENVS: + def test_suite_functionality(self): + """Tests that suite step environments run without errors using random actions.""" + for env_id in SUITE_IDS: with self.subTest(msg=env_id): self._run_env(env_id) - def test_dmc_determinism(self): - """Tests that identical seeds produce identical trajectories.""" - seed = 0 - # Iterate over two trajectories, which should have the same state and action sequence - for env_id in DMC_ENVS: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + def test_suite_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(SUITE_IDS) def test_manipulation_functionality(self): - """Tests that environments runs without errors using random actions.""" - for env_id in MANIPULATION_SPECS: + """Tests that manipulation step environments run without errors using random actions.""" + for env_id in MANIPULATION_IDS: with self.subTest(msg=env_id): self._run_env(env_id) def test_manipulation_determinism(self): - """Tests that identical seeds produce identical trajectories.""" - seed = 0 - # Iterate over two trajectories, which should have the same state and action sequence - for env_id in MANIPULATION_SPECS: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(MANIPULATION_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) if __name__ == '__main__': diff --git a/test/test_metaworld_envs.py b/test/test_gym.py similarity index 68% rename from test/test_metaworld_envs.py rename to test/test_gym.py index ac2a013..f264c49 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_gym.py @@ -3,25 +3,24 @@ import unittest import gym import numpy as np +import alr_envs from alr_envs import make -from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE -ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +METAWORLD_IDS = [] SEED = 1 -class TestStepMetaWorlEnvironments(unittest.TestCase): +class TestGymEnvironments(unittest.TestCase): def _run_env(self, env_id, iterations=None, seed=SEED, render=False): """ - Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or - for manipulation tasks as `manipulation-environment_name` + Example for running a openai gym env in the step based setting. + The env_id has to be specified as `env_id-vX`. Args: - env_id: Either `domain_name-task_name` or `manipulation-environment_name` + env_id: env id in the form `env_id-vX` iterations: Number of rollout steps to run - seed= random seeding + seed: random seeding render: Render the episode Returns: @@ -65,6 +64,19 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): del env return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + def _verify_observations(self, obs, observation_space, obs_type="reset()"): self.assertTrue(observation_space.contains(obs), f"Observation {obs} received from {obs_type} " @@ -76,26 +88,29 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - def test_metaworld_functionality(self): - """Tests that environments runs without errors using random actions.""" - for env_id in ALL_ENVS: + def test_step_functionality(self): + """Tests that step environments run without errors using random actions.""" + for env_id in GYM_IDS: with self.subTest(msg=env_id): self._run_env(env_id) - def test_metaworld_determinism(self): - """Tests that identical seeds produce identical trajectories.""" - seed = 0 - # Iterate over two trajectories, which should have the same state and action sequence - for env_id in ALL_ENVS: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + def test_step_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(GYM_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) if __name__ == '__main__': diff --git a/test/test_metaworld.py b/test/test_metaworld.py new file mode 100644 index 0000000..2f7af22 --- /dev/null +++ b/test/test_metaworld.py @@ -0,0 +1,119 @@ +import unittest + +import gym +import numpy as np + +import alr_envs +from alr_envs import make +from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE + +METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in + ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +SEED = 1 + + +class TestMetaWorldEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a metaworld based env in the step based setting. + The env_id has to be specified as `metaworld:env_id-vX`. + + Args: + env_id: env id in the form `metaworld:env_id-vX` + iterations: Number of rollout steps to run + seed: random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + actions = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + iterations = iterations or (env.spec.max_episode_steps or 1) + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + actions.append(ac) + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + break + + assert done, "Done flag is not True after end of episode." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_step_functionality(self): + """Tests that step environments run without errors using random actions.""" + for env_id in METAWORLD_IDS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_step_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(METAWORLD_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) + + +if __name__ == '__main__': + unittest.main()