diff --git a/README.md b/README.md index 85249fb..086c447 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Fancy Gym +# Fancy Gym Fancy gym offers a large variety of reinforcement learning environments under the unifying interface of [OpenAI gym](https://gym.openai.com/). We provide support (under the OpenAI interface) for the benchmark suites @@ -65,7 +65,7 @@ We prepared [multiple examples](fancy_gym/examples/), please have a look there f ```python import fancy_gym -env = fancy_gym.make('HoleReacher-v0', seed=1) +env = fancy_gym.make('Reacher5d-v0', seed=1) state = env.reset() for i in range(1000): @@ -106,7 +106,7 @@ keys `DMP` and `ProMP` that store a list of available environment names. import fancy_gym print("Custom MP tasks:") -print(fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS) +print(fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("OpenAI Gym MP tasks:") print(fancy_gym.ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS) diff --git a/fancy_gym/__init__.py b/fancy_gym/__init__.py index 457e3b1..f6f690a 100644 --- a/fancy_gym/__init__.py +++ b/fancy_gym/__init__.py @@ -2,7 +2,7 @@ from fancy_gym import dmc, meta, open_ai from fancy_gym.utils.make_env_helpers import make, make_bb, make_rank from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS # Convenience function for all MP environments -from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS +from .envs import ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS @@ -10,4 +10,4 @@ ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] - for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} + for key, value in ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} diff --git a/fancy_gym/envs/__init__.py b/fancy_gym/envs/__init__.py index f75d884..f6c7bc7 100644 --- a/fancy_gym/envs/__init__.py +++ b/fancy_gym/envs/__init__.py @@ -17,7 +17,7 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER from .mujoco.reacher.reacher import ReacherEnv, MAX_EPISODE_STEPS_REACHER from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP -ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} DEFAULT_BB_DICT_ProMP = { "name": 'EnvName', @@ -231,7 +231,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_dmp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -244,7 +244,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_promp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # Viapoint reacher kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) @@ -259,7 +259,7 @@ register( # max_episode_steps=1, kwargs=kwargs_dict_via_point_reacher_dmp ) -ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) @@ -270,7 +270,7 @@ register( entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_via_point_reacher_promp ) -ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") +ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ## Hole Reacher _versions = ["HoleReacher-v0"] @@ -290,7 +290,7 @@ for _v in _versions: # max_episode_steps=1, kwargs=kwargs_dict_hole_reacher_dmp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -303,7 +303,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hole_reacher_promp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## ReacherNd _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"] @@ -320,7 +320,7 @@ for _v in _versions: # max_episode_steps=1, kwargs=kwargs_dict_reacher_dmp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -331,7 +331,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_reacher_promp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## ## Beerpong ProMP @@ -352,7 +352,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ### BP with Fixed release _versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"] @@ -372,7 +372,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## ## Table Tennis needs to be fixed according to Zhou's implementation @@ -393,7 +393,7 @@ for _v in _versions: # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_ant_jump_promp # ) -# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ######################################################################################################################## # @@ -410,7 +410,7 @@ for _v in _versions: # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_halfcheetah_jump_promp # ) -# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ######################################################################################################################## @@ -431,7 +431,7 @@ for _v in _versions: entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hopper_jump_promp ) - ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ######################################################################################################################## # @@ -449,7 +449,7 @@ for _v in _versions: # entry_point='fancy_gym.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_walker2d_jump_promp # ) -# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ### Depricated, we will not provide non random starts anymore """ diff --git a/fancy_gym/envs/mujoco/beerpong/beerpong.py b/fancy_gym/envs/mujoco/beerpong/beerpong.py index 9fcfdd8..368425d 100644 --- a/fancy_gym/envs/mujoco/beerpong/beerpong.py +++ b/fancy_gym/envs/mujoco/beerpong/beerpong.py @@ -6,6 +6,7 @@ from gym import utils from gym.envs.mujoco import MujocoEnv MAX_EPISODE_STEPS_BEERPONG = 300 +FIXED_RELEASE_STEP = 62 # empirically evaluated for frame_skip=2! # XML Variables ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom", @@ -44,7 +45,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self._start_pos = np.array([0.0, 1.35, 0.0, 1.18, 0.0, -0.786, -1.59]) self._start_vel = np.zeros(7) - self.release_step = 100 # time step of ball release + self.release_step = FIXED_RELEASE_STEP self.repeat_action = 2 # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this @@ -250,86 +251,16 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): return False -class BeerPongEnvFixedReleaseStep(BeerPongEnv): - def __init__(self): - super().__init__() - self.release_step = 62 # empirically evaluated for frame_skip=2! - - class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): - def __init__(self): - super().__init__() - self.release_step = 62 # empirically evaluated for frame_skip=2! def step(self, a): - if self._steps < self.release_step: + if self._steps < FIXED_RELEASE_STEP: return super(BeerPongEnvStepBasedEpisodicReward, self).step(a) else: reward = 0 - done = False - while not done: - sub_ob, sub_reward, done, sub_infos = super(BeerPongEnvStepBasedEpisodicReward, self).step( + done = True + while self._steps < MAX_EPISODE_STEPS_BEERPONG: + obs, sub_reward, done, infos = super(BeerPongEnvStepBasedEpisodicReward, self).step( np.zeros(a.shape)) reward += sub_reward - infos = sub_infos - ob = sub_ob - ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the - # internal steps and thus, the observation also needs to be set correctly - return ob, reward, done, infos - - -# class BeerBongEnvStepBased(BeerBongEnv): -# def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): -# super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) -# self.release_step = 62 # empirically evaluated for frame_skip=2! -# -# def step(self, a): -# if self._steps < self.release_step: -# return super(BeerBongEnvStepBased, self).step(a) -# else: -# reward = 0 -# done = False -# while not done: -# sub_ob, sub_reward, done, sub_infos = super(BeerBongEnvStepBased, self).step(np.zeros(a.shape)) -# if not done or sub_infos['sim_crash']: -# reward += sub_reward -# else: -# ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy() -# cup_goal_dist_final = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ -# self.sim.model._site_name2id["cup_goal_final_table"]].copy()) -# cup_goal_dist_top = np.linalg.norm(ball_pos - self.sim.data.site_xpos[ -# self.sim.model._site_name2id["cup_goal_table"]].copy()) -# if sub_infos['success']: -# dist_rew = -cup_goal_dist_final ** 2 -# else: -# dist_rew = -0.5 * cup_goal_dist_final ** 2 - cup_goal_dist_top ** 2 -# reward = reward - sub_infos['action_cost'] + dist_rew -# infos = sub_infos -# ob = sub_ob -# ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the -# # internal steps and thus, the observation also needs to be set correctly -# return ob, reward, done, infos - - -if __name__ == "__main__": - env = BeerPongEnv() - env.seed(0) - # env = BeerBongEnvStepBased(frame_skip=2) - # env = BeerBongEnvStepBasedEpisodicReward(frame_skip=2) - # env = BeerBongEnvFixedReleaseStep(frame_skip=2) - import time - - env.reset() - env.render("human") - for i in range(600): - # ac = 10 * env.action_space.sample() - ac = 0.05 * np.ones(7) - obs, rew, d, info = env.step(ac) - env.render("human") - - if d: - print('reward:', rew) - print('RESETTING') - env.reset() - time.sleep(1) - env.close() + return obs, reward, done, infos diff --git a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py index b5e6687..8988f5a 100644 --- a/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py +++ b/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py @@ -31,9 +31,7 @@ class MPWrapper(RawInterfaceWrapper): def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: if mp.learn_tau: self.release_step = action[0] / self.dt # Tau value - return action, None - else: - return action, None + return action, None def set_context(self, context): xyz = np.zeros(3) diff --git a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py index 329ea89..22e706a 100644 --- a/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py +++ b/fancy_gym/envs/mujoco/hopper_jump/hopper_jump.py @@ -156,8 +156,6 @@ class HopperJumpEnv(HopperEnv): self.init_floor_contact = False self.contact_dist = None - self.data.geom() - return observation def _is_floor_foot_contact(self): diff --git a/test/test_custom.py b/test/test_fancy.py similarity index 95% rename from test/test_custom.py rename to test/test_fancy.py index 65633f8..d4890cc 100644 --- a/test/test_custom.py +++ b/test/test_fancy.py @@ -101,7 +101,7 @@ class TestCustomEnvironments(unittest.TestCase): def test_bb_functionality(self): """Tests that black box environments run without errors using random actions.""" - for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): with self.subTest(msg=traj_gen): for id in env_ids: with self.subTest(msg=id): @@ -109,7 +109,7 @@ class TestCustomEnvironments(unittest.TestCase): def test_bb_determinism(self): """Tests that for black box environment identical seeds produce identical trajectories.""" - for traj_gen, env_ids in fancy_gym.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + for traj_gen, env_ids in fancy_gym.ALL_FANCY_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): with self.subTest(msg=traj_gen): self._run_env_determinism(env_ids)