From 5d4fc4d52f6f8d3e22fcba4b94601ec5a4527f63 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 12 Jul 2022 14:16:20 +0200 Subject: [PATCH 1/5] ant jump seeding fix --- alr_envs/alr/mujoco/ant_jump/ant_jump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alr_envs/alr/mujoco/ant_jump/ant_jump.py b/alr_envs/alr/mujoco/ant_jump/ant_jump.py index deffcfa..eddfbe0 100644 --- a/alr_envs/alr/mujoco/ant_jump/ant_jump.py +++ b/alr_envs/alr/mujoco/ant_jump/ant_jump.py @@ -84,7 +84,7 @@ class AntJumpEnv(AntEnv): options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: self.current_step = 0 self.max_height = 0 - self.goal = np.random.uniform(1.0, 2.5, + self.goal = self.np_random.uniform(1.0, 2.5, 1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE return super().reset() From 993df10fad75aa7be60f6e1c7a10568cb5720ec7 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 12 Jul 2022 14:18:01 +0200 Subject: [PATCH 2/5] hopper throw seeding fixed --- .../alr/mujoco/hopper_throw/hopper_throw.py | 21 +++++++++-------- .../hopper_throw/hopper_throw_in_basket.py | 23 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py index 7ae33d1..c2503c4 100644 --- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py +++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py @@ -27,7 +27,7 @@ class ALRHopperThrowEnv(HopperEnv): healthy_z_range=(0.7, float('inf')), healthy_angle_range=(-float('inf'), float('inf')), reset_noise_scale=5e-3, - context = True, + context=True, exclude_current_positions_from_observation=True, max_episode_steps=250): xml_file = os.path.join(os.path.dirname(__file__), "assets", xml_file) @@ -40,10 +40,10 @@ class ALRHopperThrowEnv(HopperEnv): exclude_current_positions_from_observation) def step(self, action): - self.current_step += 1 self.do_simulation(action, self.frame_skip) - ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward? + ball_pos_after = self.get_body_com("ball")[ + 0] # abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward? ball_pos_after_y = self.get_body_com("ball")[2] # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc @@ -57,7 +57,7 @@ class ALRHopperThrowEnv(HopperEnv): if self.current_step >= self.max_episode_steps or done: distance_reward = -np.linalg.norm(ball_pos_after - self.goal) if self.context else \ - self._forward_reward_weight * ball_pos_after + self._forward_reward_weight * ball_pos_after healthy_reward = 0 if self.context else self.healthy_reward * self.current_step rewards = distance_reward + healthy_reward @@ -67,8 +67,8 @@ class ALRHopperThrowEnv(HopperEnv): info = { 'ball_pos': ball_pos_after, 'ball_pos_y': ball_pos_after_y, - '_steps' : self.current_step, - 'goal' : self.goal, + '_steps': self.current_step, + 'goal': self.goal, } return observation, reward, done, info @@ -78,7 +78,7 @@ class ALRHopperThrowEnv(HopperEnv): def reset(self): self.current_step = 0 - self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0 + self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0 return super().reset() # overwrite reset_model to make it deterministic @@ -86,14 +86,15 @@ class ALRHopperThrowEnv(HopperEnv): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale - qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) - qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) + qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) + qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) self.set_state(qpos, qvel) observation = self._get_obs() return observation + if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" env = ALRHopperThrowEnv() @@ -110,4 +111,4 @@ if __name__ == '__main__': print('After ', i, ' steps, done: ', d) env.reset() - env.close() \ No newline at end of file + env.close() diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py index 74a5b21..6827bf8 100644 --- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -3,7 +3,6 @@ from gym.envs.mujoco.hopper_v3 import HopperEnv import numpy as np - MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250 @@ -33,7 +32,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv): context=True, penalty=0.0, exclude_current_positions_from_observation=True, - max_episode_steps = 250): + max_episode_steps=250): self.hit_basket_reward = hit_basket_reward self.current_step = 0 self.max_episode_steps = max_episode_steps @@ -57,7 +56,8 @@ class ALRHopperThrowInBasketEnv(HopperEnv): basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2]) is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size - is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2) + is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size / 2) and ball_pos[1] <= basket_pos[1] + ( + self.basket_size / 2) is_in_basket_z = ball_pos[2] < 0.1 is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z if is_in_basket: self.ball_in_basket = True @@ -77,15 +77,16 @@ class ALRHopperThrowInBasketEnv(HopperEnv): if not self.context: rewards += self.hit_basket_reward else: - dist = np.linalg.norm(ball_pos-basket_center) + dist = np.linalg.norm(ball_pos - basket_center) if self.context: rewards = -10 * dist else: - rewards -= (dist*dist) + rewards -= (dist * dist) else: # penalty not needed - rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty? - + rewards += ((action[ + :2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 # too much of a penalty? + observation = self._get_obs() reward = rewards - costs info = { @@ -106,7 +107,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv): self.ball_in_basket = False if self.context: basket_id = self.sim.model.body_name2id("basket_ground") - self.basket_x = np.random.uniform(3, 7, 1) + self.basket_x = self.np_random.uniform(3, 7, 1) self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0] return super().reset() @@ -115,8 +116,8 @@ class ALRHopperThrowInBasketEnv(HopperEnv): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale - qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) - qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) + qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) + qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) self.set_state(qpos, qvel) @@ -140,4 +141,4 @@ if __name__ == '__main__': print('After ', i, ' steps, done: ', d) env.reset() - env.close() \ No newline at end of file + env.close() From 79c26681c925e752c1a57dc481baf87e08db19b3 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 12 Jul 2022 14:33:20 +0200 Subject: [PATCH 3/5] examples updated --- alr_envs/examples/examples_movement_primitives.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/alr_envs/examples/examples_movement_primitives.py b/alr_envs/examples/examples_movement_primitives.py index 85dd6a1..755d912 100644 --- a/alr_envs/examples/examples_movement_primitives.py +++ b/alr_envs/examples/examples_movement_primitives.py @@ -36,11 +36,11 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True env.render(mode=None) # Now the action space is not the raw action but the parametrization of the trajectory generator, - # such as a ProMP + # such as a ProMP. You can still use it the same, though. ac = env.action_space.sample() # This executes a full trajectory obs, reward, done, info = env.step(ac) - # Aggregated reward + # Aggregated reward of trajectory rewards += reward if done: @@ -62,9 +62,8 @@ def example_custom_mp(env_name="Reacher5dProMP-v0", seed=1, iterations=1, render """ # Changing the arguments of the black box env is possible by providing them to gym as with all kwargs. - # E.g. here for way to many basis functions - # env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) - env = alr_envs.make(env_name, seed) + # E.g. here for adding a lot of basis functions + env = alr_envs.make(env_name, seed, basis_generator_kwargs={'num_basis': 1000}) # mp_dict.update({'black_box_kwargs': {'learn_sub_trajectories': True}}) # mp_dict.update({'black_box_kwargs': {'do_replanning': lambda pos, vel, t: lambda t: t % 100}}) From 0339361656c08e19fba57bca68ac86f7b66a6bae Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 12 Jul 2022 15:17:02 +0200 Subject: [PATCH 4/5] renameing alr module and updating tests --- README.md | 4 +- alr_envs/__init__.py | 8 +- alr_envs/{alr => envs}/__init__.py | 94 +++++----- .../{alr => envs}/classic_control/README.MD | 0 .../{alr => envs}/classic_control/__init__.py | 0 .../classic_control/base_reacher/__init__.py | 0 .../base_reacher/base_reacher.py | 2 +- .../base_reacher/base_reacher_direct.py | 2 +- .../base_reacher/base_reacher_torque.py | 2 +- .../classic_control/hole_reacher/__init__.py | 0 .../hole_reacher/hole_reacher.py | 8 +- .../hole_reacher/hr_dist_vel_acc_reward.py | 0 .../hole_reacher/hr_simple_reward.py | 0 .../hole_reacher/hr_unbounded_reward.py | 0 .../hole_reacher/mp_wrapper.py | 0 .../simple_reacher/__init__.py | 0 .../simple_reacher/mp_wrapper.py | 0 .../simple_reacher/simple_reacher.py | 2 +- .../{alr => envs}/classic_control/utils.py | 0 .../viapoint_reacher/__init__.py | 0 .../viapoint_reacher/mp_wrapper.py | 0 .../viapoint_reacher/viapoint_reacher.py | 2 +- alr_envs/{alr => envs}/mujoco/README.MD | 0 alr_envs/{alr => envs}/mujoco/__init__.py | 0 .../{alr => envs}/mujoco/ant_jump/__init__.py | 0 .../{alr => envs}/mujoco/ant_jump/ant_jump.py | 6 +- .../mujoco/ant_jump/assets/ant.xml | 0 .../mujoco/ant_jump/mp_wrapper.py | 0 .../{alr => envs}/mujoco/beerpong/__init__.py | 0 .../mujoco/beerpong/assets/beerpong.xml | 0 .../beerpong/assets/beerpong_wo_cup.xml | 0 .../assets/beerpong_wo_cup_big_table.xml | 0 .../assets/meshes/wam/base_link_convex.stl | Bin .../assets/meshes/wam/base_link_fine.stl | Bin .../wam/bhand_finger_dist_link_convex.stl | Bin .../wam/bhand_finger_dist_link_fine.stl | Bin .../wam/bhand_finger_med_link_convex.stl | Bin .../meshes/wam/bhand_finger_med_link_fine.stl | Bin ...nger_prox_link_convex_decomposition_p1.stl | Bin ...nger_prox_link_convex_decomposition_p2.stl | Bin ...nger_prox_link_convex_decomposition_p3.stl | Bin .../wam/bhand_finger_prox_link_fine.stl | Bin .../assets/meshes/wam/bhand_palm_fine.stl | Bin ...hand_palm_link_convex_decomposition_p1.stl | Bin ...hand_palm_link_convex_decomposition_p2.stl | Bin ...hand_palm_link_convex_decomposition_p3.stl | Bin ...hand_palm_link_convex_decomposition_p4.stl | Bin .../mujoco/beerpong/assets/meshes/wam/cup.stl | Bin .../beerpong/assets/meshes/wam/cup_split1.stl | Bin .../assets/meshes/wam/cup_split10.stl | Bin .../assets/meshes/wam/cup_split11.stl | Bin .../assets/meshes/wam/cup_split12.stl | Bin .../assets/meshes/wam/cup_split13.stl | Bin .../assets/meshes/wam/cup_split14.stl | Bin .../assets/meshes/wam/cup_split15.stl | Bin .../assets/meshes/wam/cup_split16.stl | Bin .../assets/meshes/wam/cup_split17.stl | Bin .../assets/meshes/wam/cup_split18.stl | Bin .../beerpong/assets/meshes/wam/cup_split2.stl | Bin .../beerpong/assets/meshes/wam/cup_split3.stl | Bin .../beerpong/assets/meshes/wam/cup_split4.stl | Bin .../beerpong/assets/meshes/wam/cup_split5.stl | Bin .../beerpong/assets/meshes/wam/cup_split6.stl | Bin .../beerpong/assets/meshes/wam/cup_split7.stl | Bin .../beerpong/assets/meshes/wam/cup_split8.stl | Bin .../beerpong/assets/meshes/wam/cup_split9.stl | Bin .../assets/meshes/wam/elbow_link_convex.stl | Bin .../assets/meshes/wam/elbow_link_fine.stl | Bin .../forearm_link_convex_decomposition_p1.stl | Bin .../forearm_link_convex_decomposition_p2.stl | Bin .../assets/meshes/wam/forearm_link_fine.stl | Bin .../shoulder_link_convex_decomposition_p1.stl | Bin .../shoulder_link_convex_decomposition_p2.stl | Bin .../shoulder_link_convex_decomposition_p3.stl | Bin .../assets/meshes/wam/shoulder_link_fine.stl | Bin .../meshes/wam/shoulder_pitch_link_convex.stl | Bin .../meshes/wam/shoulder_pitch_link_fine.stl | Bin ...upper_arm_link_convex_decomposition_p1.stl | Bin ...upper_arm_link_convex_decomposition_p2.stl | Bin .../assets/meshes/wam/upper_arm_link_fine.stl | Bin .../meshes/wam/wrist_palm_link_convex.stl | Bin .../meshes/wam/wrist_palm_link_fine.stl | Bin ...ist_pitch_link_convex_decomposition_p1.stl | Bin ...ist_pitch_link_convex_decomposition_p2.stl | Bin ...ist_pitch_link_convex_decomposition_p3.stl | Bin .../meshes/wam/wrist_pitch_link_fine.stl | Bin ...wrist_yaw_link_convex_decomposition_p1.stl | Bin ...wrist_yaw_link_convex_decomposition_p2.stl | Bin .../assets/meshes/wam/wrist_yaw_link_fine.stl | Bin .../{alr => envs}/mujoco/beerpong/beerpong.py | 0 .../mujoco/beerpong/deprecated/__init__.py | 0 .../mujoco/beerpong/deprecated/beerpong.py | 2 +- .../deprecated/beerpong_reward_staged.py | 0 .../mujoco/beerpong/mp_wrapper.py | 0 .../mujoco/half_cheetah_jump/__init__.py | 0 .../half_cheetah_jump/assets/cheetah.xml | 0 .../half_cheetah_jump/half_cheetah_jump.py | 0 .../mujoco/half_cheetah_jump/mp_wrapper.py | 0 .../mujoco/hopper_jump/__init__.py | 0 .../mujoco/hopper_jump/assets/hopper_jump.xml | 0 .../hopper_jump/assets/hopper_jump_on_box.xml | 0 .../mujoco/hopper_jump/hopper_jump.py | 0 .../mujoco/hopper_jump/hopper_jump_on_box.py | 2 +- .../mujoco/hopper_jump/mp_wrapper.py | 0 .../mujoco/hopper_throw/__init__.py | 0 .../hopper_throw/assets/hopper_throw.xml | 0 .../assets/hopper_throw_in_basket.xml | 0 .../mujoco/hopper_throw/hopper_throw.py | 5 +- .../hopper_throw/hopper_throw_in_basket.py | 11 +- .../mujoco/hopper_throw/mp_wrapper.py | 0 .../{alr => envs}/mujoco/reacher/__init__.py | 0 .../mujoco/reacher/assets/reacher_5links.xml | 0 .../mujoco/reacher/assets/reacher_7links.xml | 0 .../mujoco/reacher/mp_wrapper.py | 0 .../{alr => envs}/mujoco/reacher/reacher.py | 0 .../mujoco/walker_2d_jump/__init__.py | 0 .../mujoco/walker_2d_jump/assets/walker2d.xml | 0 .../mujoco/walker_2d_jump/mp_wrapper.py | 0 .../mujoco/walker_2d_jump/walker_2d_jump.py | 18 +- alr_envs/examples/pd_control_gain_tuning.py | 2 +- alr_envs/meta/__init__.py | 10 +- setup.py | 4 +- test/test_bb_envs.py | 168 ------------------ test/test_custom.py | 118 ++++++++++++ test/{test_dmc_envs.py => test_dmc.py} | 85 ++++----- test/{test_metaworld_envs.py => test_gym.py} | 65 ++++--- test/test_metaworld.py | 119 +++++++++++++ 127 files changed, 418 insertions(+), 321 deletions(-) rename alr_envs/{alr => envs}/__init__.py (87%) rename alr_envs/{alr => envs}/classic_control/README.MD (100%) rename alr_envs/{alr => envs}/classic_control/__init__.py (100%) rename alr_envs/{alr => envs}/classic_control/base_reacher/__init__.py (100%) rename alr_envs/{alr => envs}/classic_control/base_reacher/base_reacher.py (98%) rename alr_envs/{alr => envs}/classic_control/base_reacher/base_reacher_direct.py (93%) rename alr_envs/{alr => envs}/classic_control/base_reacher/base_reacher_torque.py (92%) rename alr_envs/{alr => envs}/classic_control/hole_reacher/__init__.py (100%) rename alr_envs/{alr => envs}/classic_control/hole_reacher/hole_reacher.py (95%) rename alr_envs/{alr => envs}/classic_control/hole_reacher/hr_dist_vel_acc_reward.py (100%) rename alr_envs/{alr => envs}/classic_control/hole_reacher/hr_simple_reward.py (100%) rename alr_envs/{alr => envs}/classic_control/hole_reacher/hr_unbounded_reward.py (100%) rename alr_envs/{alr => envs}/classic_control/hole_reacher/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/classic_control/simple_reacher/__init__.py (100%) rename alr_envs/{alr => envs}/classic_control/simple_reacher/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/classic_control/simple_reacher/simple_reacher.py (97%) rename alr_envs/{alr => envs}/classic_control/utils.py (100%) rename alr_envs/{alr => envs}/classic_control/viapoint_reacher/__init__.py (100%) rename alr_envs/{alr => envs}/classic_control/viapoint_reacher/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/classic_control/viapoint_reacher/viapoint_reacher.py (98%) rename alr_envs/{alr => envs}/mujoco/README.MD (100%) rename alr_envs/{alr => envs}/mujoco/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/ant_jump/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/ant_jump/ant_jump.py (92%) rename alr_envs/{alr => envs}/mujoco/ant_jump/assets/ant.xml (100%) rename alr_envs/{alr => envs}/mujoco/ant_jump/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/beerpong.xml (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/beerpong_wo_cup.xml (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split10.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split11.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split12.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split13.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split14.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split15.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split16.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split17.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split18.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split3.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split4.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split5.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split6.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split7.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split8.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/cup_split9.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/beerpong.py (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/deprecated/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/deprecated/beerpong.py (98%) rename alr_envs/{alr => envs}/mujoco/beerpong/deprecated/beerpong_reward_staged.py (100%) rename alr_envs/{alr => envs}/mujoco/beerpong/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/half_cheetah_jump/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/half_cheetah_jump/assets/cheetah.xml (100%) rename alr_envs/{alr => envs}/mujoco/half_cheetah_jump/half_cheetah_jump.py (100%) rename alr_envs/{alr => envs}/mujoco/half_cheetah_jump/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/hopper_jump/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/hopper_jump/assets/hopper_jump.xml (100%) rename alr_envs/{alr => envs}/mujoco/hopper_jump/assets/hopper_jump_on_box.xml (100%) rename alr_envs/{alr => envs}/mujoco/hopper_jump/hopper_jump.py (100%) rename alr_envs/{alr => envs}/mujoco/hopper_jump/hopper_jump_on_box.py (98%) rename alr_envs/{alr => envs}/mujoco/hopper_jump/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/hopper_throw/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/hopper_throw/assets/hopper_throw.xml (100%) rename alr_envs/{alr => envs}/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml (100%) rename alr_envs/{alr => envs}/mujoco/hopper_throw/hopper_throw.py (95%) rename alr_envs/{alr => envs}/mujoco/hopper_throw/hopper_throw_in_basket.py (94%) rename alr_envs/{alr => envs}/mujoco/hopper_throw/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/reacher/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/reacher/assets/reacher_5links.xml (100%) rename alr_envs/{alr => envs}/mujoco/reacher/assets/reacher_7links.xml (100%) rename alr_envs/{alr => envs}/mujoco/reacher/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/reacher/reacher.py (100%) rename alr_envs/{alr => envs}/mujoco/walker_2d_jump/__init__.py (100%) rename alr_envs/{alr => envs}/mujoco/walker_2d_jump/assets/walker2d.xml (100%) rename alr_envs/{alr => envs}/mujoco/walker_2d_jump/mp_wrapper.py (100%) rename alr_envs/{alr => envs}/mujoco/walker_2d_jump/walker_2d_jump.py (86%) delete mode 100644 test/test_bb_envs.py create mode 100644 test/test_custom.py rename test/{test_dmc_envs.py => test_dmc.py} (63%) rename test/{test_metaworld_envs.py => test_gym.py} (68%) create mode 100644 test/test_metaworld.py diff --git a/README.md b/README.md index ac012c4..5d267a3 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ keys `DMP` and `ProMP` that store a list of available environment names. import alr_envs print("Custom MP tasks:") -print(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS) +print(alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("OpenAI Gym MP tasks:") print(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS) @@ -116,7 +116,7 @@ print("Deepmind Control MP tasks:") print(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS) print("MetaWorld MP tasks:") -print(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS) +print(alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS) ``` ### How to create a new MP task diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index e4a405d..d63a656 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -2,13 +2,13 @@ from alr_envs import dmc, meta, open_ai from alr_envs.utils.make_env_helpers import make, make_bb, make_rank # Convenience function for all MP environments -from .alr import ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS +from .envs import ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .dmc import ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS -from .meta import ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS +from .meta import ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS from .open_ai import ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS ALL_MOVEMENT_PRIMITIVE_ENVIRONMENTS = { key: value + ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS[key] + - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS[key] - for key, value in ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS.items()} + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS[key] + for key, value in ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items()} diff --git a/alr_envs/alr/__init__.py b/alr_envs/envs/__init__.py similarity index 87% rename from alr_envs/alr/__init__.py rename to alr_envs/envs/__init__.py index 3aea422..2f3b713 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/envs/__init__.py @@ -16,7 +16,7 @@ from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPER from .mujoco.reacher.reacher import ReacherEnv from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} DEFAULT_BB_DICT_ProMP = { "name": 'EnvName', @@ -63,7 +63,7 @@ DEFAULT_BB_DICT_DMP = { ## Simple Reacher register( id='SimpleReacher-v0', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 2, @@ -72,7 +72,7 @@ register( register( id='LongSimpleReacher-v0', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -83,7 +83,7 @@ register( register( id='ViaPointReacher-v0', - entry_point='alr_envs.alr.classic_control:ViaPointReacherEnv', + entry_point='alr_envs.envs.classic_control:ViaPointReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -95,7 +95,7 @@ register( ## Hole Reacher register( id='HoleReacher-v0', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.envs.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -115,7 +115,7 @@ register( for _dims in [5, 7]: register( id=f'Reacher{_dims}d-v0', - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "n_links": _dims, @@ -124,7 +124,7 @@ for _dims in [5, 7]: register( id=f'Reacher{_dims}dSparse-v0', - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "sparse": True, @@ -134,7 +134,7 @@ for _dims in [5, 7]: register( id='HopperJumpSparse-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "sparse": True, @@ -143,7 +143,7 @@ register( register( id='HopperJump-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "sparse": False, @@ -155,43 +155,43 @@ register( register( id='ALRAntJump-v0', - entry_point='alr_envs.alr.mujoco:AntJumpEnv', + entry_point='alr_envs.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, ) register( id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRHalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, ) register( id='HopperJumpOnBox-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpOnBoxEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, ) register( id='ALRHopperThrow-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, ) register( id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, ) register( id='ALRWalker2DJump-v0', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRWalker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, ) register( id='BeerPong-v0', - entry_point='alr_envs.alr.mujoco:BeerPongEnv', + entry_point='alr_envs.envs.mujoco:BeerPongEnv', max_episode_steps=300, ) @@ -199,14 +199,14 @@ register( # only one time step, i.e. we simulate until the end of th episode register( id='BeerPongStepBased-v0', - entry_point='alr_envs.alr.mujoco:BeerPongEnvStepBasedEpisodicReward', + entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', max_episode_steps=300, ) # Beerpong with episodic reward, but fixed release time step register( id='BeerPongFixedRelease-v0', - entry_point='alr_envs.alr.mujoco:BeerPongEnvFixedReleaseStep', + entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep', max_episode_steps=300, ) @@ -229,7 +229,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_dmp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_simple_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -242,7 +242,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_simple_reacher_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # Viapoint reacher kwargs_dict_via_point_reacher_dmp = deepcopy(DEFAULT_BB_DICT_DMP) @@ -257,7 +257,7 @@ register( # max_episode_steps=1, kwargs=kwargs_dict_via_point_reacher_dmp ) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") +ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append("ViaPointReacherDMP-v0") kwargs_dict_via_point_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) kwargs_dict_via_point_reacher_promp['wrappers'].append(classic_control.viapoint_reacher.MPWrapper) @@ -268,7 +268,7 @@ register( entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_via_point_reacher_promp ) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") +ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ViaPointReacherProMP-v0") ## Hole Reacher _versions = ["HoleReacher-v0"] @@ -288,7 +288,7 @@ for _v in _versions: # max_episode_steps=1, kwargs=kwargs_dict_hole_reacher_dmp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_hole_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -301,7 +301,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hole_reacher_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## ReacherNd _versions = ["Reacher5d-v0", "Reacher7d-v0", "Reacher5dSparse-v0", "Reacher7dSparse-v0"] @@ -320,7 +320,7 @@ for _v in _versions: # max_episode_steps=1, kwargs=kwargs_dict_reacherNd_dmp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"].append(_env_id) _env_id = f'{_name[0]}ProMP-{_name[1]}' kwargs_dict_alr_reacher_promp = deepcopy(DEFAULT_BB_DICT_ProMP) @@ -333,7 +333,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_alr_reacher_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## ## Beerpong ProMP @@ -354,7 +354,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ### BP with Fixed release _versions = ["BeerPongStepBased-v0", "BeerPongFixedRelease-v0"] @@ -374,7 +374,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_bp_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ######################################################################################################################## ## Table Tennis needs to be fixed according to Zhou's implementation @@ -395,7 +395,7 @@ for _v in _versions: # entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_ant_jump_promp # ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ######################################################################################################################## # @@ -412,7 +412,7 @@ for _v in _versions: # entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_halfcheetah_jump_promp # ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # # ######################################################################################################################## @@ -433,7 +433,7 @@ for _v in _versions: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_hopper_jump_promp ) - ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) # ######################################################################################################################## # @@ -451,13 +451,13 @@ for _v in _versions: # entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', # kwargs=kwargs_dict_walker2d_jump_promp # ) -# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) +# ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ### Depricated, we will not provide non random starts anymore """ register( id='SimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 2, @@ -467,7 +467,7 @@ register( register( id='LongSimpleReacher-v1', - entry_point='alr_envs.alr.classic_control:SimpleReacherEnv', + entry_point='alr_envs.envs.classic_control:SimpleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -476,7 +476,7 @@ register( ) register( id='HoleReacher-v1', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.envs.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -491,7 +491,7 @@ register( ) register( id='HoleReacher-v2', - entry_point='alr_envs.alr.classic_control:HoleReacherEnv', + entry_point='alr_envs.envs.classic_control:HoleReacherEnv', max_episode_steps=200, kwargs={ "n_links": 5, @@ -508,7 +508,7 @@ register( # CtxtFree are v0, Contextual are v1 register( id='ALRAntJump-v0', - entry_point='alr_envs.alr.mujoco:AntJumpEnv', + entry_point='alr_envs.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_ANTJUMP, @@ -518,7 +518,7 @@ register( # CtxtFree are v0, Contextual are v1 register( id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.alr.mujoco:ALRHalfCheetahJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRHalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, @@ -527,7 +527,7 @@ register( ) register( id='ALRHopperJump-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMP, @@ -545,7 +545,7 @@ for i in _vs: _env_id = f'ALRReacher{i}-v0' register( id=_env_id, - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "steps_before_reward": 0, @@ -558,7 +558,7 @@ for i in _vs: _env_id = f'ALRReacherSparse{i}-v0' register( id=_env_id, - entry_point='alr_envs.alr.mujoco:ReacherEnv', + entry_point='alr_envs.envs.mujoco:ReacherEnv', max_episode_steps=200, kwargs={ "steps_before_reward": 200, @@ -617,7 +617,7 @@ for i in _vs: register( id='ALRHopperJumpOnBox-v0', - entry_point='alr_envs.alr.mujoco:HopperJumpOnBoxEnv', + entry_point='alr_envs.envs.mujoco:HopperJumpOnBoxEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERJUMPONBOX, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERJUMPONBOX, @@ -626,7 +626,7 @@ for i in _vs: ) register( id='ALRHopperThrow-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, @@ -635,7 +635,7 @@ for i in _vs: ) register( id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.alr.mujoco:ALRHopperThrowInBasketEnv', + entry_point='alr_envs.envs.mujoco:ALRHopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, @@ -644,7 +644,7 @@ for i in _vs: ) register( id='ALRWalker2DJump-v0', - entry_point='alr_envs.alr.mujoco:ALRWalker2dJumpEnv', + entry_point='alr_envs.envs.mujoco:ALRWalker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, @@ -652,13 +652,13 @@ for i in _vs: } ) register(id='TableTennis2DCtxt-v1', - entry_point='alr_envs.alr.mujoco:TTEnvGym', + entry_point='alr_envs.envs.mujoco:TTEnvGym', max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 2, 'fixed_goal': True}) register( id='ALRBeerPong-v0', - entry_point='alr_envs.alr.mujoco:ALRBeerBongEnv', + entry_point='alr_envs.envs.mujoco:ALRBeerBongEnv', max_episode_steps=300, kwargs={ "rndm_goal": False, diff --git a/alr_envs/alr/classic_control/README.MD b/alr_envs/envs/classic_control/README.MD similarity index 100% rename from alr_envs/alr/classic_control/README.MD rename to alr_envs/envs/classic_control/README.MD diff --git a/alr_envs/alr/classic_control/__init__.py b/alr_envs/envs/classic_control/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/__init__.py rename to alr_envs/envs/classic_control/__init__.py diff --git a/alr_envs/alr/classic_control/base_reacher/__init__.py b/alr_envs/envs/classic_control/base_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/base_reacher/__init__.py rename to alr_envs/envs/classic_control/base_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher.py b/alr_envs/envs/classic_control/base_reacher/base_reacher.py similarity index 98% rename from alr_envs/alr/classic_control/base_reacher/base_reacher.py rename to alr_envs/envs/classic_control/base_reacher/base_reacher.py index 1af8187..f9186d8 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher.py +++ b/alr_envs/envs/classic_control/base_reacher/base_reacher.py @@ -7,7 +7,7 @@ from gym import spaces from gym.core import ObsType from gym.utils import seeding -from alr_envs.alr.classic_control.utils import intersect +from alr_envs.envs.classic_control.utils import intersect class BaseReacherEnv(gym.Env, ABC): diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py b/alr_envs/envs/classic_control/base_reacher/base_reacher_direct.py similarity index 93% rename from alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py rename to alr_envs/envs/classic_control/base_reacher/base_reacher_direct.py index dc79827..05cff5b 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_direct.py +++ b/alr_envs/envs/classic_control/base_reacher/base_reacher_direct.py @@ -2,7 +2,7 @@ from abc import ABC from gym import spaces import numpy as np -from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv class BaseReacherDirectEnv(BaseReacherEnv, ABC): diff --git a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py b/alr_envs/envs/classic_control/base_reacher/base_reacher_torque.py similarity index 92% rename from alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py rename to alr_envs/envs/classic_control/base_reacher/base_reacher_torque.py index 094f632..469d8a3 100644 --- a/alr_envs/alr/classic_control/base_reacher/base_reacher_torque.py +++ b/alr_envs/envs/classic_control/base_reacher/base_reacher_torque.py @@ -2,7 +2,7 @@ from abc import ABC from gym import spaces import numpy as np -from alr_envs.alr.classic_control.base_reacher.base_reacher import BaseReacherEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher import BaseReacherEnv class BaseReacherTorqueEnv(BaseReacherEnv, ABC): diff --git a/alr_envs/alr/classic_control/hole_reacher/__init__.py b/alr_envs/envs/classic_control/hole_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/__init__.py rename to alr_envs/envs/classic_control/hole_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py b/alr_envs/envs/classic_control/hole_reacher/hole_reacher.py similarity index 95% rename from alr_envs/alr/classic_control/hole_reacher/hole_reacher.py rename to alr_envs/envs/classic_control/hole_reacher/hole_reacher.py index 8f0122f..0bd0e5c 100644 --- a/alr_envs/alr/classic_control/hole_reacher/hole_reacher.py +++ b/alr_envs/envs/classic_control/hole_reacher/hole_reacher.py @@ -6,7 +6,7 @@ import numpy as np from gym.core import ObsType from matplotlib import patches -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv class HoleReacherEnv(BaseReacherDirectEnv): @@ -41,13 +41,13 @@ class HoleReacherEnv(BaseReacherDirectEnv): self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) if rew_fct == "simple": - from alr_envs.alr.classic_control.hole_reacher.hr_simple_reward import HolereacherReward + from alr_envs.envs.classic_control.hole_reacher.hr_simple_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) elif rew_fct == "vel_acc": - from alr_envs.alr.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward + from alr_envs.envs.classic_control.hole_reacher.hr_dist_vel_acc_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision, collision_penalty) elif rew_fct == "unbounded": - from alr_envs.alr.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward + from alr_envs.envs.classic_control.hole_reacher.hr_unbounded_reward import HolereacherReward self.reward_function = HolereacherReward(allow_self_collision, allow_wall_collision) else: raise ValueError("Unknown reward function {}".format(rew_fct)) diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py b/alr_envs/envs/classic_control/hole_reacher/hr_dist_vel_acc_reward.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/hr_dist_vel_acc_reward.py rename to alr_envs/envs/classic_control/hole_reacher/hr_dist_vel_acc_reward.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py b/alr_envs/envs/classic_control/hole_reacher/hr_simple_reward.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/hr_simple_reward.py rename to alr_envs/envs/classic_control/hole_reacher/hr_simple_reward.py diff --git a/alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py b/alr_envs/envs/classic_control/hole_reacher/hr_unbounded_reward.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/hr_unbounded_reward.py rename to alr_envs/envs/classic_control/hole_reacher/hr_unbounded_reward.py diff --git a/alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py b/alr_envs/envs/classic_control/hole_reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/classic_control/hole_reacher/mp_wrapper.py rename to alr_envs/envs/classic_control/hole_reacher/mp_wrapper.py diff --git a/alr_envs/alr/classic_control/simple_reacher/__init__.py b/alr_envs/envs/classic_control/simple_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/simple_reacher/__init__.py rename to alr_envs/envs/classic_control/simple_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py b/alr_envs/envs/classic_control/simple_reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/classic_control/simple_reacher/mp_wrapper.py rename to alr_envs/envs/classic_control/simple_reacher/mp_wrapper.py diff --git a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py b/alr_envs/envs/classic_control/simple_reacher/simple_reacher.py similarity index 97% rename from alr_envs/alr/classic_control/simple_reacher/simple_reacher.py rename to alr_envs/envs/classic_control/simple_reacher/simple_reacher.py index eb079d0..8c6f8d5 100644 --- a/alr_envs/alr/classic_control/simple_reacher/simple_reacher.py +++ b/alr_envs/envs/classic_control/simple_reacher/simple_reacher.py @@ -5,7 +5,7 @@ import numpy as np from gym import spaces from gym.core import ObsType -from alr_envs.alr.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher_torque import BaseReacherTorqueEnv class SimpleReacherEnv(BaseReacherTorqueEnv): diff --git a/alr_envs/alr/classic_control/utils.py b/alr_envs/envs/classic_control/utils.py similarity index 100% rename from alr_envs/alr/classic_control/utils.py rename to alr_envs/envs/classic_control/utils.py diff --git a/alr_envs/alr/classic_control/viapoint_reacher/__init__.py b/alr_envs/envs/classic_control/viapoint_reacher/__init__.py similarity index 100% rename from alr_envs/alr/classic_control/viapoint_reacher/__init__.py rename to alr_envs/envs/classic_control/viapoint_reacher/__init__.py diff --git a/alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py b/alr_envs/envs/classic_control/viapoint_reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/classic_control/viapoint_reacher/mp_wrapper.py rename to alr_envs/envs/classic_control/viapoint_reacher/mp_wrapper.py diff --git a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py b/alr_envs/envs/classic_control/viapoint_reacher/viapoint_reacher.py similarity index 98% rename from alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py rename to alr_envs/envs/classic_control/viapoint_reacher/viapoint_reacher.py index 569ca2c..9266721 100644 --- a/alr_envs/alr/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/alr_envs/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -6,7 +6,7 @@ import numpy as np from gym.core import ObsType from gym.utils import seeding -from alr_envs.alr.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv +from alr_envs.envs.classic_control.base_reacher.base_reacher_direct import BaseReacherDirectEnv class ViaPointReacherEnv(BaseReacherDirectEnv): diff --git a/alr_envs/alr/mujoco/README.MD b/alr_envs/envs/mujoco/README.MD similarity index 100% rename from alr_envs/alr/mujoco/README.MD rename to alr_envs/envs/mujoco/README.MD diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/envs/mujoco/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/__init__.py rename to alr_envs/envs/mujoco/__init__.py diff --git a/alr_envs/alr/mujoco/ant_jump/__init__.py b/alr_envs/envs/mujoco/ant_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/ant_jump/__init__.py rename to alr_envs/envs/mujoco/ant_jump/__init__.py diff --git a/alr_envs/alr/mujoco/ant_jump/ant_jump.py b/alr_envs/envs/mujoco/ant_jump/ant_jump.py similarity index 92% rename from alr_envs/alr/mujoco/ant_jump/ant_jump.py rename to alr_envs/envs/mujoco/ant_jump/ant_jump.py index eddfbe0..74a66a3 100644 --- a/alr_envs/alr/mujoco/ant_jump/ant_jump.py +++ b/alr_envs/envs/mujoco/ant_jump/ant_jump.py @@ -55,7 +55,7 @@ class AntJumpEnv(AntEnv): costs = ctrl_cost + contact_cost - done = height < 0.3 # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle + done = bool(height < 0.3) # fall over -> is the 0.3 value from healthy_z_range? TODO change 0.3 to the value of healthy z angle if self.current_step == MAX_EPISODE_STEPS_ANTJUMP or done: # -10 for scaling the value of the distance between the max_height and the goal height; only used when context is enabled @@ -84,8 +84,8 @@ class AntJumpEnv(AntEnv): options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: self.current_step = 0 self.max_height = 0 - self.goal = self.np_random.uniform(1.0, 2.5, - 1) # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE + # goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE + self.goal = self.np_random.uniform(1.0, 2.5, 1) return super().reset() # reset_model had to be implemented in every env to make it deterministic diff --git a/alr_envs/alr/mujoco/ant_jump/assets/ant.xml b/alr_envs/envs/mujoco/ant_jump/assets/ant.xml similarity index 100% rename from alr_envs/alr/mujoco/ant_jump/assets/ant.xml rename to alr_envs/envs/mujoco/ant_jump/assets/ant.xml diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/envs/mujoco/ant_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/ant_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/ant_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/beerpong/__init__.py b/alr_envs/envs/mujoco/beerpong/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/__init__.py rename to alr_envs/envs/mujoco/beerpong/__init__.py diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong.xml similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/beerpong.xml rename to alr_envs/envs/mujoco/beerpong/assets/beerpong.xml diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup.xml similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup.xml rename to alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup.xml diff --git a/alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml b/alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml rename to alr_envs/envs/mujoco/beerpong/assets/beerpong_wo_cup_big_table.xml diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/base_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_dist_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_med_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_finger_prox_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/bhand_palm_link_convex_decomposition_p4.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split10.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split10.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split10.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split10.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split11.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split11.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split11.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split11.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split12.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split12.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split12.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split12.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split13.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split13.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split13.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split13.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split14.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split14.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split14.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split14.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split15.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split15.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split15.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split15.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split16.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split16.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split16.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split16.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split17.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split17.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split17.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split17.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split18.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split18.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split18.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split18.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split4.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split4.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split4.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split4.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split5.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split5.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split5.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split5.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split6.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split6.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split6.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split6.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split7.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split7.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split7.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split7.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split8.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split8.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split8.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split8.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split9.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split9.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/cup_split9.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/cup_split9.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/elbow_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/forearm_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/shoulder_pitch_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/upper_arm_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_convex.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_palm_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_convex_decomposition_p3.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_pitch_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p1.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_convex_decomposition_p2.stl diff --git a/alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl b/alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl similarity index 100% rename from alr_envs/alr/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl rename to alr_envs/envs/mujoco/beerpong/assets/meshes/wam/wrist_yaw_link_fine.stl diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/envs/mujoco/beerpong/beerpong.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/beerpong.py rename to alr_envs/envs/mujoco/beerpong/beerpong.py diff --git a/alr_envs/alr/mujoco/beerpong/deprecated/__init__.py b/alr_envs/envs/mujoco/beerpong/deprecated/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/deprecated/__init__.py rename to alr_envs/envs/mujoco/beerpong/deprecated/__init__.py diff --git a/alr_envs/alr/mujoco/beerpong/deprecated/beerpong.py b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py similarity index 98% rename from alr_envs/alr/mujoco/beerpong/deprecated/beerpong.py rename to alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py index 0fe7a42..cc9a9de 100644 --- a/alr_envs/alr/mujoco/beerpong/deprecated/beerpong.py +++ b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py @@ -5,7 +5,7 @@ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv -from alr_envs.alr.mujoco.beerpong.deprecated.beerpong_reward_staged import BeerPongReward +from alr_envs.envs.mujoco.beerpong.deprecated.beerpong_reward_staged import BeerPongReward class BeerPongEnv(MujocoEnv, utils.EzPickle): diff --git a/alr_envs/alr/mujoco/beerpong/deprecated/beerpong_reward_staged.py b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong_reward_staged.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/deprecated/beerpong_reward_staged.py rename to alr_envs/envs/mujoco/beerpong/deprecated/beerpong_reward_staged.py diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/beerpong/mp_wrapper.py rename to alr_envs/envs/mujoco/beerpong/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/__init__.py b/alr_envs/envs/mujoco/half_cheetah_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/__init__.py rename to alr_envs/envs/mujoco/half_cheetah_jump/__init__.py diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml b/alr_envs/envs/mujoco/half_cheetah_jump/assets/cheetah.xml similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/assets/cheetah.xml rename to alr_envs/envs/mujoco/half_cheetah_jump/assets/cheetah.xml diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/half_cheetah_jump.py rename to alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py diff --git a/alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py b/alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/half_cheetah_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/half_cheetah_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/hopper_jump/__init__.py b/alr_envs/envs/mujoco/hopper_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/__init__.py rename to alr_envs/envs/mujoco/hopper_jump/__init__.py diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml b/alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump.xml rename to alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump.xml diff --git a/alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml b/alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/assets/hopper_jump_on_box.xml rename to alr_envs/envs/mujoco/hopper_jump/assets/hopper_jump_on_box.xml diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/hopper_jump.py rename to alr_envs/envs/mujoco/hopper_jump/hopper_jump.py diff --git a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py similarity index 98% rename from alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py rename to alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py index ac7e16b..845edaa 100644 --- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump_on_box.py +++ b/alr_envs/envs/mujoco/hopper_jump/hopper_jump_on_box.py @@ -134,7 +134,7 @@ class HopperJumpOnBoxEnv(HopperEnv): self.hopper_on_box = False if self.context: box_id = self.sim.model.body_name2id("box") - self.box_x = np.random.uniform(1, 3, 1) + self.box_x = self.np_random.uniform(1, 3, 1) self.sim.model.body_pos[box_id] = [self.box_x, 0, 0] return super().reset() diff --git a/alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/hopper_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/hopper_throw/__init__.py b/alr_envs/envs/mujoco/hopper_throw/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/__init__.py rename to alr_envs/envs/mujoco/hopper_throw/__init__.py diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml b/alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw.xml rename to alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw.xml diff --git a/alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml b/alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml rename to alr_envs/envs/mujoco/hopper_throw/assets/hopper_throw_in_basket.xml diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py similarity index 95% rename from alr_envs/alr/mujoco/hopper_throw/hopper_throw.py rename to alr_envs/envs/mujoco/hopper_throw/hopper_throw.py index c2503c4..5630958 100644 --- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py @@ -1,4 +1,5 @@ import os +from typing import Optional from gym.envs.mujoco.hopper_v3 import HopperEnv import numpy as np @@ -47,7 +48,7 @@ class ALRHopperThrowEnv(HopperEnv): ball_pos_after_y = self.get_body_com("ball")[2] # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc - ball_landed = self.get_body_com("ball")[2] <= 0.05 + ball_landed = bool(self.get_body_com("ball")[2] <= 0.05) done = ball_landed ctrl_cost = self.control_cost(action) @@ -76,7 +77,7 @@ class ALRHopperThrowEnv(HopperEnv): def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self): + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_step = 0 self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0 return super().reset() diff --git a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py similarity index 94% rename from alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py rename to alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 6827bf8..7ea9675 100644 --- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -1,4 +1,6 @@ import os +from typing import Optional + from gym.envs.mujoco.hopper_v3 import HopperEnv import numpy as np @@ -57,13 +59,14 @@ class ALRHopperThrowInBasketEnv(HopperEnv): is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size / 2) and ball_pos[1] <= basket_pos[1] + ( - self.basket_size / 2) + self.basket_size / 2) is_in_basket_z = ball_pos[2] < 0.1 is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z - if is_in_basket: self.ball_in_basket = True + if is_in_basket: + self.ball_in_basket = True ball_landed = self.get_body_com("ball")[2] <= 0.05 - done = ball_landed or is_in_basket + done = bool(ball_landed or is_in_basket) rewards = 0 @@ -98,7 +101,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv): def _get_obs(self): return np.append(super()._get_obs(), self.basket_x) - def reset(self): + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): if self.max_episode_steps == 10: # We have to initialize this here, because the spec is only added after creating the env. self.max_episode_steps = self.spec.max_episode_steps diff --git a/alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py b/alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/hopper_throw/mp_wrapper.py rename to alr_envs/envs/mujoco/hopper_throw/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/reacher/__init__.py b/alr_envs/envs/mujoco/reacher/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/reacher/__init__.py rename to alr_envs/envs/mujoco/reacher/__init__.py diff --git a/alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml b/alr_envs/envs/mujoco/reacher/assets/reacher_5links.xml similarity index 100% rename from alr_envs/alr/mujoco/reacher/assets/reacher_5links.xml rename to alr_envs/envs/mujoco/reacher/assets/reacher_5links.xml diff --git a/alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml b/alr_envs/envs/mujoco/reacher/assets/reacher_7links.xml similarity index 100% rename from alr_envs/alr/mujoco/reacher/assets/reacher_7links.xml rename to alr_envs/envs/mujoco/reacher/assets/reacher_7links.xml diff --git a/alr_envs/alr/mujoco/reacher/mp_wrapper.py b/alr_envs/envs/mujoco/reacher/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/reacher/mp_wrapper.py rename to alr_envs/envs/mujoco/reacher/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/reacher/reacher.py b/alr_envs/envs/mujoco/reacher/reacher.py similarity index 100% rename from alr_envs/alr/mujoco/reacher/reacher.py rename to alr_envs/envs/mujoco/reacher/reacher.py diff --git a/alr_envs/alr/mujoco/walker_2d_jump/__init__.py b/alr_envs/envs/mujoco/walker_2d_jump/__init__.py similarity index 100% rename from alr_envs/alr/mujoco/walker_2d_jump/__init__.py rename to alr_envs/envs/mujoco/walker_2d_jump/__init__.py diff --git a/alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml b/alr_envs/envs/mujoco/walker_2d_jump/assets/walker2d.xml similarity index 100% rename from alr_envs/alr/mujoco/walker_2d_jump/assets/walker2d.xml rename to alr_envs/envs/mujoco/walker_2d_jump/assets/walker2d.xml diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py similarity index 100% rename from alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py rename to alr_envs/envs/mujoco/walker_2d_jump/mp_wrapper.py diff --git a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py similarity index 86% rename from alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py rename to alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py index 1ab0d29..5b143bc 100644 --- a/alr_envs/alr/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -1,9 +1,12 @@ import os +from typing import Optional + from gym.envs.mujoco.walker2d_v3 import Walker2dEnv import numpy as np MAX_EPISODE_STEPS_WALKERJUMP = 300 + # TODO: Right now this environment only considers jumping to a specific height, which is not nice. It should be extended # to the same structure as the Hopper, where the angles are randomized (->contexts) and the agent should jump as height # as possible, while landing at a specific target position @@ -36,16 +39,16 @@ class ALRWalker2dJumpEnv(Walker2dEnv): super().__init__(xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, healthy_angle_range, reset_noise_scale, exclude_current_positions_from_observation) - + def step(self, action): self.current_step += 1 self.do_simulation(action, self.frame_skip) - #pos_after = self.get_body_com("torso")[0] + # pos_after = self.get_body_com("torso")[0] height = self.get_body_com("torso")[2] self.max_height = max(height, self.max_height) - done = height < 0.2 + done = bool(height < 0.2) ctrl_cost = self.control_cost(action) costs = ctrl_cost @@ -70,10 +73,10 @@ class ALRWalker2dJumpEnv(Walker2dEnv): def _get_obs(self): return np.append(super()._get_obs(), self.goal) - def reset(self): + def reset(self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None): self.current_step = 0 self.max_height = 0 - self.goal = np.random.uniform(1.5, 2.5, 1) # 1.5 3.0 + self.goal = self.np_random.uniform(1.5, 2.5, 1) # 1.5 3.0 return super().reset() # overwrite reset_model to make it deterministic @@ -81,14 +84,15 @@ class ALRWalker2dJumpEnv(Walker2dEnv): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale - qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) - qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) + qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq) + qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv) self.set_state(qpos, qvel) observation = self._get_obs() return observation + if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" env = ALRWalker2dJumpEnv() diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py index 27cf8f8..79161d4 100644 --- a/alr_envs/examples/pd_control_gain_tuning.py +++ b/alr_envs/examples/pd_control_gain_tuning.py @@ -2,7 +2,7 @@ import numpy as np from matplotlib import pyplot as plt from alr_envs import dmc, meta -from alr_envs.alr import mujoco +from alr_envs.envs import mujoco from alr_envs.utils.make_env_helpers import make_promp_env diff --git a/alr_envs/meta/__init__.py b/alr_envs/meta/__init__.py index 6ccd622..fcc87cc 100644 --- a/alr_envs/meta/__init__.py +++ b/alr_envs/meta/__init__.py @@ -5,7 +5,7 @@ from gym import register from . import goal_object_change_mp_wrapper, goal_change_mp_wrapper, goal_endeffector_change_mp_wrapper, \ object_change_mp_wrapper -ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} +ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS = {"DMP": [], "ProMP": []} # MetaWorld @@ -43,7 +43,7 @@ for _task in _goal_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_goal_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _object_change_envs = ["bin-picking-v2", "hammer-v2", "sweep-into-v2"] for _task in _object_change_envs: @@ -58,7 +58,7 @@ for _task in _object_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_object_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _goal_and_object_change_envs = ["box-close-v2", "button-press-v2", "button-press-wall-v2", "button-press-topdown-v2", "button-press-topdown-wall-v2", "coffee-button-v2", "coffee-pull-v2", @@ -84,7 +84,7 @@ for _task in _goal_and_object_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_goal_and_object_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) _goal_and_endeffector_change_envs = ["basketball-v2"] for _task in _goal_and_endeffector_change_envs: @@ -100,4 +100,4 @@ for _task in _goal_and_endeffector_change_envs: entry_point='alr_envs.utils.make_env_helpers:make_bb_env_helper', kwargs=kwargs_dict_goal_and_endeffector_change_promp ) - ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) + ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) diff --git a/setup.py b/setup.py index 3b78401..02dc453 100644 --- a/setup.py +++ b/setup.py @@ -31,10 +31,10 @@ setup( "mujoco_py<2.2,>=2.1", ], packages=[package for package in find_packages() if package.startswith("alr_envs")], - # packages=['alr_envs', 'alr_envs.alr', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'], + # packages=['alr_envs', 'alr_envs.envs', 'alr_envs.open_ai', 'alr_envs.dmc', 'alr_envs.meta', 'alr_envs.utils'], package_data={ "alr_envs": [ - "alr/mujoco/*/assets/*.xml", + "envs/mujoco/*/assets/*.xml", ] }, python_requires=">=3.6", diff --git a/test/test_bb_envs.py b/test/test_bb_envs.py deleted file mode 100644 index 49eb31e..0000000 --- a/test/test_bb_envs.py +++ /dev/null @@ -1,168 +0,0 @@ -import unittest - -import gym -import numpy as np - -import alr_envs # noqa -from alr_envs.utils.make_env_helpers import make - -ALL_SPECS = list(spec for spec in gym.envs.registry.all() if "alr_envs" in spec.entry_point) -SEED = 1 - - -class TestMPEnvironments(unittest.TestCase): - - def _run_env(self, env_id, iterations=None, seed=SEED, render=False): - """ - Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or - for manipulation tasks as `manipulation-environment_name` - - Args: - env_id: Either `domain_name-task_name` or `manipulation-environment_name` - iterations: Number of rollout steps to run - seed= random seeding - render: Render the episode - - Returns: - - """ - env: gym.Env = make(env_id, seed=seed) - rewards = [] - observations = [] - dones = [] - obs = env.reset() - self._verify_observations(obs, env.observation_space, "reset()") - - iterations = iterations or (env.spec.max_episode_steps or 1) - - # number of samples(multiple environment steps) - for i in range(iterations): - observations.append(obs) - - actions = env.action_space.sample() - # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) - obs, reward, done, info = env.step(actions) - - self._verify_observations(obs, env.observation_space, "step()") - self._verify_reward(reward) - self._verify_done(done) - - rewards.append(reward) - dones.append(done) - - if render: - env.render("human") - - if done: - break - - assert done, "Done flag is not True after end of episode." - observations.append(obs) - env.close() - del env - return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) - - def _run_env_determinism(self, ids): - seed = 0 - for env_id in ids: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") - - def _verify_observations(self, obs, observation_space, obs_type="reset()"): - self.assertTrue(observation_space.contains(obs), - f"Observation {obs} received from {obs_type} " - f"not contained in observation space {observation_space}.") - - def _verify_reward(self, reward): - self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") - - def _verify_done(self, done): - self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - - def test_alr_environment_functionality(self): - """Tests that environments runs without errors using random actions for ALR MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_openai_environment_functionality(self): - """Tests that environments runs without errors using random actions for OpenAI gym MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_dmc_environment_functionality(self): - """Tests that environments runs without errors using random actions for DMC MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_metaworld_environment_functionality(self): - """Tests that environments runs without errors using random actions for Metaworld MP envs.""" - with self.subTest(msg="DMP"): - for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['DMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - with self.subTest(msg="ProMP"): - for env_id in alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS['ProMP']: - with self.subTest(msg=env_id): - self._run_env(env_id) - - def test_alr_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for ALR MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - def test_openai_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for OpenAI gym MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - def test_dmc_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for DMC MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - def test_metaworld_environment_determinism(self): - """Tests that identical seeds produce identical trajectories for Metaworld MP Envs.""" - with self.subTest(msg="DMP"): - self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["DMP"]) - with self.subTest(msg="ProMP"): - self._run_env_determinism(alr_envs.ALL_METAWORLD_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"]) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/test_custom.py b/test/test_custom.py new file mode 100644 index 0000000..37bc48d --- /dev/null +++ b/test/test_custom.py @@ -0,0 +1,118 @@ +import unittest + +import gym +import numpy as np + +import alr_envs # noqa +from alr_envs.utils.make_env_helpers import make + +CUSTOM_IDS = [spec.id for spec in gym.envs.registry.all() if + "alr_envs" in spec.entry_point and not 'make_bb_env_helper' in spec.entry_point] +SEED = 1 + + +class TestCustomEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a DMC based env in the step based setting. + The env_id has to be specified as `domain_name-task_name` or + for manipulation tasks as `manipulation-environment_name` + + Args: + env_id: Either `domain_name-task_name` or `manipulation-environment_name` + iterations: Number of rollout steps to run + seed: random seeding + render: Render the episode + + Returns: observations, rewards, dones, actions + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + actions = [] + observations = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + iterations = iterations or (env.spec.max_episode_steps or 1) + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + actions.append(ac) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + break + + assert done, "Done flag is not True after end of episode." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_step_functionality(self): + """Tests that step environments run without errors using random actions.""" + for env_id in CUSTOM_IDS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_step_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(CUSTOM_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_ALR_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dmc_envs.py b/test/test_dmc.py similarity index 63% rename from test/test_dmc_envs.py rename to test/test_dmc.py index a90814d..4a67ecd 100644 --- a/test/test_dmc_envs.py +++ b/test/test_dmc.py @@ -5,31 +5,31 @@ import numpy as np from dm_control import suite, manipulation +import alr_envs from alr_envs import make -DMC_ENVS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] -MANIPULATION_SPECS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] +SUITE_IDS = [f'dmc:{env}-{task}' for env, task in suite.ALL_TASKS if env != "lqr"] +MANIPULATION_IDS = [f'dmc:manipulation-{task}' for task in manipulation.ALL if task.endswith('_features')] SEED = 1 -class TestStepDMCEnvironments(unittest.TestCase): +class TestDMCEnvironments(unittest.TestCase): def _run_env(self, env_id, iterations=None, seed=SEED, render=False): """ Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or + The env_id has to be specified as `dmc:domain_name-task_name` or for manipulation tasks as `manipulation-environment_name` Args: - env_id: Either `domain_name-task_name` or `manipulation-environment_name` + env_id: Either `dmc:domain_name-task_name` or `dmc:manipulation-environment_name` iterations: Number of rollout steps to run - seed= random seeding + seed: random seeding render: Render the episode - Returns: + Returns: observations, rewards, dones, actions """ - print(env_id) env: gym.Env = make(env_id, seed=seed) rewards = [] observations = [] @@ -68,6 +68,19 @@ class TestStepDMCEnvironments(unittest.TestCase): del env return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + def _verify_observations(self, obs, observation_space, obs_type="reset()"): self.assertTrue(observation_space.contains(obs), f"Observation {obs} received from {obs_type} " @@ -79,47 +92,39 @@ class TestStepDMCEnvironments(unittest.TestCase): def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - def test_dmc_functionality(self): - """Tests that environments runs without errors using random actions.""" - for env_id in DMC_ENVS: + def test_suite_functionality(self): + """Tests that suite step environments run without errors using random actions.""" + for env_id in SUITE_IDS: with self.subTest(msg=env_id): self._run_env(env_id) - def test_dmc_determinism(self): - """Tests that identical seeds produce identical trajectories.""" - seed = 0 - # Iterate over two trajectories, which should have the same state and action sequence - for env_id in DMC_ENVS: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + def test_suite_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(SUITE_IDS) def test_manipulation_functionality(self): - """Tests that environments runs without errors using random actions.""" - for env_id in MANIPULATION_SPECS: + """Tests that manipulation step environments run without errors using random actions.""" + for env_id in MANIPULATION_IDS: with self.subTest(msg=env_id): self._run_env(env_id) def test_manipulation_determinism(self): - """Tests that identical seeds produce identical trajectories.""" - seed = 0 - # Iterate over two trajectories, which should have the same state and action sequence - for env_id in MANIPULATION_SPECS: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(MANIPULATION_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_DMC_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) if __name__ == '__main__': diff --git a/test/test_metaworld_envs.py b/test/test_gym.py similarity index 68% rename from test/test_metaworld_envs.py rename to test/test_gym.py index ac2a013..f264c49 100644 --- a/test/test_metaworld_envs.py +++ b/test/test_gym.py @@ -3,25 +3,24 @@ import unittest import gym import numpy as np +import alr_envs from alr_envs import make -from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE -ALL_ENVS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +METAWORLD_IDS = [] SEED = 1 -class TestStepMetaWorlEnvironments(unittest.TestCase): +class TestGymEnvironments(unittest.TestCase): def _run_env(self, env_id, iterations=None, seed=SEED, render=False): """ - Example for running a DMC based env in the step based setting. - The env_id has to be specified as `domain_name-task_name` or - for manipulation tasks as `manipulation-environment_name` + Example for running a openai gym env in the step based setting. + The env_id has to be specified as `env_id-vX`. Args: - env_id: Either `domain_name-task_name` or `manipulation-environment_name` + env_id: env id in the form `env_id-vX` iterations: Number of rollout steps to run - seed= random seeding + seed: random seeding render: Render the episode Returns: @@ -65,6 +64,19 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): del env return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + def _verify_observations(self, obs, observation_space, obs_type="reset()"): self.assertTrue(observation_space.contains(obs), f"Observation {obs} received from {obs_type} " @@ -76,26 +88,29 @@ class TestStepMetaWorlEnvironments(unittest.TestCase): def _verify_done(self, done): self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") - def test_metaworld_functionality(self): - """Tests that environments runs without errors using random actions.""" - for env_id in ALL_ENVS: + def test_step_functionality(self): + """Tests that step environments run without errors using random actions.""" + for env_id in GYM_IDS: with self.subTest(msg=env_id): self._run_env(env_id) - def test_metaworld_determinism(self): - """Tests that identical seeds produce identical trajectories.""" - seed = 0 - # Iterate over two trajectories, which should have the same state and action sequence - for env_id in ALL_ENVS: - with self.subTest(msg=env_id): - traj1 = self._run_env(env_id, seed=seed) - traj2 = self._run_env(env_id, seed=seed) - for i, time_step in enumerate(zip(*traj1, *traj2)): - obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step - self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") - self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") - self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") - self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + def test_step_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(GYM_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_GYM_MOTION_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) if __name__ == '__main__': diff --git a/test/test_metaworld.py b/test/test_metaworld.py new file mode 100644 index 0000000..2f7af22 --- /dev/null +++ b/test/test_metaworld.py @@ -0,0 +1,119 @@ +import unittest + +import gym +import numpy as np + +import alr_envs +from alr_envs import make +from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE + +METAWORLD_IDS = [f'metaworld:{env.split("-goal-observable")[0]}' for env, _ in + ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE.items()] +SEED = 1 + + +class TestMetaWorldEnvironments(unittest.TestCase): + + def _run_env(self, env_id, iterations=None, seed=SEED, render=False): + """ + Example for running a metaworld based env in the step based setting. + The env_id has to be specified as `metaworld:env_id-vX`. + + Args: + env_id: env id in the form `metaworld:env_id-vX` + iterations: Number of rollout steps to run + seed: random seeding + render: Render the episode + + Returns: + + """ + env: gym.Env = make(env_id, seed=seed) + rewards = [] + observations = [] + actions = [] + dones = [] + obs = env.reset() + self._verify_observations(obs, env.observation_space, "reset()") + + iterations = iterations or (env.spec.max_episode_steps or 1) + + # number of samples(multiple environment steps) + for i in range(iterations): + observations.append(obs) + + ac = env.action_space.sample() + actions.append(ac) + # ac = np.random.uniform(env.action_space.low, env.action_space.high, env.action_space.shape) + obs, reward, done, info = env.step(ac) + + self._verify_observations(obs, env.observation_space, "step()") + self._verify_reward(reward) + self._verify_done(done) + + rewards.append(reward) + dones.append(done) + + if render: + env.render("human") + + if done: + break + + assert done, "Done flag is not True after end of episode." + observations.append(obs) + env.close() + del env + return np.array(observations), np.array(rewards), np.array(dones), np.array(actions) + + def _run_env_determinism(self, ids): + seed = 0 + for env_id in ids: + with self.subTest(msg=env_id): + traj1 = self._run_env(env_id, seed=seed) + traj2 = self._run_env(env_id, seed=seed) + for i, time_step in enumerate(zip(*traj1, *traj2)): + obs1, rwd1, done1, ac1, obs2, rwd2, done2, ac2 = time_step + self.assertTrue(np.array_equal(ac1, ac2), f"Actions [{i}] delta {ac1 - ac2} is not zero.") + self.assertTrue(np.array_equal(obs1, obs2), f"Observations [{i}] delta {obs1 - obs2} is not zero.") + self.assertEqual(rwd1, rwd2, f"Rewards [{i}] {rwd1} and {rwd2} do not match.") + self.assertEqual(done1, done2, f"Dones [{i}] {done1} and {done2} do not match.") + + def _verify_observations(self, obs, observation_space, obs_type="reset()"): + self.assertTrue(observation_space.contains(obs), + f"Observation {obs} received from {obs_type} " + f"not contained in observation space {observation_space}.") + + def _verify_reward(self, reward): + self.assertIsInstance(reward, (float, int), f"Returned type {type(reward)} as reward, expected float or int.") + + def _verify_done(self, done): + self.assertIsInstance(done, bool, f"Returned {done} as done flag, expected bool.") + + def test_step_functionality(self): + """Tests that step environments run without errors using random actions.""" + for env_id in METAWORLD_IDS: + with self.subTest(msg=env_id): + self._run_env(env_id) + + def test_step_determinism(self): + """Tests that for step environments identical seeds produce identical trajectories.""" + self._run_env_determinism(METAWORLD_IDS) + + def test_bb_functionality(self): + """Tests that black box environments run without errors using random actions.""" + for traj_gen, env_ids in alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + for id in env_ids: + with self.subTest(msg=id): + self._run_env(id) + + def test_bb_determinism(self): + """Tests that for black box environment identical seeds produce identical trajectories.""" + for traj_gen, env_ids in alr_envs.ALL_METAWORLD_MOVEMENT_PRIMITIVE_ENVIRONMENTS.items(): + with self.subTest(msg=traj_gen): + self._run_env_determinism(env_ids) + + +if __name__ == '__main__': + unittest.main() From d64cb614fa14ddd10dc40770f6993c0a7a6b91c4 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 12 Jul 2022 15:43:46 +0200 Subject: [PATCH 5/5] fixed seeding and tests --- alr_envs/envs/__init__.py | 20 +++++++++---------- alr_envs/envs/mujoco/__init__.py | 8 ++++---- .../mujoco/beerpong/deprecated/beerpong.py | 2 +- .../half_cheetah_jump/half_cheetah_jump.py | 2 +- .../envs/mujoco/hopper_throw/hopper_throw.py | 4 ++-- .../hopper_throw/hopper_throw_in_basket.py | 4 ++-- .../mujoco/walker_2d_jump/walker_2d_jump.py | 4 ++-- alr_envs/examples/pd_control_gain_tuning.py | 19 +++++++++--------- test/test_custom.py | 2 +- test/test_gym.py | 5 +++-- 10 files changed, 36 insertions(+), 34 deletions(-) diff --git a/alr_envs/envs/__init__.py b/alr_envs/envs/__init__.py index 2f3b713..632e1fc 100644 --- a/alr_envs/envs/__init__.py +++ b/alr_envs/envs/__init__.py @@ -154,14 +154,14 @@ register( ) register( - id='ALRAntJump-v0', + id='AntJump-v0', entry_point='alr_envs.envs.mujoco:AntJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_ANTJUMP, ) register( - id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.envs.mujoco:ALRHalfCheetahJumpEnv', + id='HalfCheetahJump-v0', + entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, ) @@ -173,19 +173,19 @@ register( register( id='ALRHopperThrow-v0', - entry_point='alr_envs.envs.mujoco:ALRHopperThrowEnv', + entry_point='alr_envs.envs.mujoco:HopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, ) register( id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.envs.mujoco:ALRHopperThrowInBasketEnv', + entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, ) register( id='ALRWalker2DJump-v0', - entry_point='alr_envs.envs.mujoco:ALRWalker2dJumpEnv', + entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, ) @@ -518,7 +518,7 @@ register( # CtxtFree are v0, Contextual are v1 register( id='ALRHalfCheetahJump-v0', - entry_point='alr_envs.envs.mujoco:ALRHalfCheetahJumpEnv', + entry_point='alr_envs.envs.mujoco:HalfCheetahJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_HALFCHEETAHJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HALFCHEETAHJUMP, @@ -626,7 +626,7 @@ for i in _vs: ) register( id='ALRHopperThrow-v0', - entry_point='alr_envs.envs.mujoco:ALRHopperThrowEnv', + entry_point='alr_envs.envs.mujoco:HopperThrowEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROW, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROW, @@ -635,7 +635,7 @@ for i in _vs: ) register( id='ALRHopperThrowInBasket-v0', - entry_point='alr_envs.envs.mujoco:ALRHopperThrowInBasketEnv', + entry_point='alr_envs.envs.mujoco:HopperThrowInBasketEnv', max_episode_steps=MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_HOPPERTHROWINBASKET, @@ -644,7 +644,7 @@ for i in _vs: ) register( id='ALRWalker2DJump-v0', - entry_point='alr_envs.envs.mujoco:ALRWalker2dJumpEnv', + entry_point='alr_envs.envs.mujoco:Walker2dJumpEnv', max_episode_steps=MAX_EPISODE_STEPS_WALKERJUMP, kwargs={ "max_episode_steps": MAX_EPISODE_STEPS_WALKERJUMP, diff --git a/alr_envs/envs/mujoco/__init__.py b/alr_envs/envs/mujoco/__init__.py index c099363..1d0cf76 100644 --- a/alr_envs/envs/mujoco/__init__.py +++ b/alr_envs/envs/mujoco/__init__.py @@ -1,9 +1,9 @@ from .beerpong.beerpong import BeerPongEnv, BeerPongEnvFixedReleaseStep, BeerPongEnvStepBasedEpisodicReward from .ant_jump.ant_jump import AntJumpEnv -from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv +from .half_cheetah_jump.half_cheetah_jump import HalfCheetahJumpEnv from .hopper_jump.hopper_jump_on_box import HopperJumpOnBoxEnv -from .hopper_throw.hopper_throw import ALRHopperThrowEnv -from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv +from .hopper_throw.hopper_throw import HopperThrowEnv +from .hopper_throw.hopper_throw_in_basket import HopperThrowInBasketEnv from .reacher.reacher import ReacherEnv -from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv +from .walker_2d_jump.walker_2d_jump import Walker2dJumpEnv from .hopper_jump.hopper_jump import HopperJumpEnv diff --git a/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py index cc9a9de..9006842 100644 --- a/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py +++ b/alr_envs/envs/mujoco/beerpong/deprecated/beerpong.py @@ -155,7 +155,7 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): return ob, reward, done, infos -# class ALRBeerBongEnvStepBased(ALRBeerBongEnv): +# class BeerBongEnvStepBased(ALRBeerBongEnv): # def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None): # super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos) # self.release_step = 62 # empirically evaluated for frame_skip=2! diff --git a/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py index 151a533..7916d0c 100644 --- a/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py +++ b/alr_envs/envs/mujoco/half_cheetah_jump/half_cheetah_jump.py @@ -8,7 +8,7 @@ import numpy as np MAX_EPISODE_STEPS_HALFCHEETAHJUMP = 100 -class ALRHalfCheetahJumpEnv(HalfCheetahEnv): +class HalfCheetahJumpEnv(HalfCheetahEnv): """ ctrl_cost_weight 0.1 -> 0.0 """ diff --git a/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py index 5630958..c2bda19 100644 --- a/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw.py @@ -7,7 +7,7 @@ import numpy as np MAX_EPISODE_STEPS_HOPPERTHROW = 250 -class ALRHopperThrowEnv(HopperEnv): +class HopperThrowEnv(HopperEnv): """ Initialization changes to normal Hopper: - healthy_reward: 1.0 -> 0.0 -> 0.1 @@ -98,7 +98,7 @@ class ALRHopperThrowEnv(HopperEnv): if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" - env = ALRHopperThrowEnv() + env = HopperThrowEnv() obs = env.reset() for i in range(2000): diff --git a/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py index 7ea9675..b1fa3ef 100644 --- a/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py +++ b/alr_envs/envs/mujoco/hopper_throw/hopper_throw_in_basket.py @@ -8,7 +8,7 @@ import numpy as np MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250 -class ALRHopperThrowInBasketEnv(HopperEnv): +class HopperThrowInBasketEnv(HopperEnv): """ Initialization changes to normal Hopper: - healthy_reward: 1.0 -> 0.0 @@ -130,7 +130,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv): if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" - env = ALRHopperThrowInBasketEnv() + env = HopperThrowInBasketEnv() obs = env.reset() for i in range(2000): diff --git a/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py index 5b143bc..76cb688 100644 --- a/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py +++ b/alr_envs/envs/mujoco/walker_2d_jump/walker_2d_jump.py @@ -12,7 +12,7 @@ MAX_EPISODE_STEPS_WALKERJUMP = 300 # as possible, while landing at a specific target position -class ALRWalker2dJumpEnv(Walker2dEnv): +class Walker2dJumpEnv(Walker2dEnv): """ healthy reward 1.0 -> 0.005 -> 0.0025 not from alex penalty 10 -> 0 not from alex @@ -95,7 +95,7 @@ class ALRWalker2dJumpEnv(Walker2dEnv): if __name__ == '__main__': render_mode = "human" # "human" or "partial" or "final" - env = ALRWalker2dJumpEnv() + env = Walker2dJumpEnv() obs = env.reset() for i in range(6000): diff --git a/alr_envs/examples/pd_control_gain_tuning.py b/alr_envs/examples/pd_control_gain_tuning.py index 79161d4..d4c5201 100644 --- a/alr_envs/examples/pd_control_gain_tuning.py +++ b/alr_envs/examples/pd_control_gain_tuning.py @@ -1,9 +1,10 @@ +from collections import OrderedDict + import numpy as np from matplotlib import pyplot as plt -from alr_envs import dmc, meta +from alr_envs import make_bb, dmc, meta from alr_envs.envs import mujoco -from alr_envs.utils.make_env_helpers import make_promp_env def visualize(env): @@ -16,11 +17,12 @@ def visualize(env): # This might work for some environments, however, please verify either way the correct trajectory information # for your environment are extracted below SEED = 1 -# env_id = "ball_in_cup-catch" -env_id = "ALRReacherSparse-v0" -env_id = "button-press-v2" +# env_id = "dmc:ball_in_cup-catch" +# wrappers = [dmc.suite.ball_in_cup.MPWrapper] +env_id = "Reacher5dSparse-v0" wrappers = [mujoco.reacher.MPWrapper] -wrappers = [meta.goal_object_change_mp_wrapper.MPWrapper] +# env_id = "metaworld:button-press-v2" +# wrappers = [meta.goal_object_change_mp_wrapper.MPWrapper] mp_kwargs = { "num_dof": 4, @@ -38,7 +40,7 @@ mp_kwargs = { # kwargs = dict(time_limit=4, episode_length=200) kwargs = {} -env = make_promp_env(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, **kwargs) +env = make_bb(env_id, wrappers, seed=SEED, mp_kwargs=mp_kwargs, **kwargs) env.action_space.seed(SEED) # Plot difference between real trajectory and target MP trajectory @@ -59,7 +61,7 @@ img = ax.imshow(env.env.render("rgb_array")) fig.show() for t, pos_vel in enumerate(zip(pos, vel)): - actions = env.policy.get_action(pos_vel[0], pos_vel[1],, self.current_vel, self.current_pos + actions = env.policy.get_action(pos_vel[0], pos_vel[1], env.current_vel, env.current_pos) actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high) _, _, _, _ = env.env.step(actions) if t % 15 == 0: @@ -81,7 +83,6 @@ p2 = plt.plot(pos, c='C1', label="MP") # , label=["MP" if i == 0 else None for plt.xlabel("Episode steps") # plt.legend() handles, labels = plt.gca().get_legend_handles_labels() -from collections import OrderedDict by_label = OrderedDict(zip(labels, handles)) plt.legend(by_label.values(), by_label.keys()) diff --git a/test/test_custom.py b/test/test_custom.py index 37bc48d..c3d71f8 100644 --- a/test/test_custom.py +++ b/test/test_custom.py @@ -7,7 +7,7 @@ import alr_envs # noqa from alr_envs.utils.make_env_helpers import make CUSTOM_IDS = [spec.id for spec in gym.envs.registry.all() if - "alr_envs" in spec.entry_point and not 'make_bb_env_helper' in spec.entry_point] + "alr_envs" in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] SEED = 1 diff --git a/test/test_gym.py b/test/test_gym.py index f264c49..4aff70d 100644 --- a/test/test_gym.py +++ b/test/test_gym.py @@ -6,7 +6,8 @@ import numpy as np import alr_envs from alr_envs import make -METAWORLD_IDS = [] +GYM_IDS = [spec.id for spec in gym.envs.registry.all() if + "alr_envs" not in spec.entry_point and 'make_bb_env_helper' not in spec.entry_point] SEED = 1 @@ -58,7 +59,7 @@ class TestGymEnvironments(unittest.TestCase): if done: break - assert done, "Done flag is not True after end of episode." + assert done or env.spec.max_episode_steps is None, "Done flag is not True after end of episode." observations.append(obs) env.close() del env