Merge branch 'master' into fix_metaworld_rendering
This commit is contained in:
		
						commit
						3be34048b2
					
				
							
								
								
									
										24
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								README.md
									
									
									
									
									
								
							| @ -105,17 +105,16 @@ Regular step based environments added by Fancy Gym are added into the `fancy/` n | ||||
| import gymnasium as gym | ||||
| import fancy_gym | ||||
| 
 | ||||
| env = gym.make('fancy/Reacher5d-v0') | ||||
| # or env = gym.make('metaworld/reach-v2') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS | ||||
| # or env = gym.make('dm_control/ball_in_cup-catch-v0') | ||||
| # or env = gym.make('Reacher-v2') | ||||
| env = gym.make('fancy/Reacher5d-v0', render_mode='human') | ||||
| # or env = gym.make('metaworld/reach-v2', render_mode='human') # fancy_gym allows access to all metaworld ML1 tasks via the metaworld/ NS | ||||
| # or env = gym.make('dm_control/ball_in_cup-catch-v0', render_mode='human') | ||||
| # or env = gym.make('Reacher-v2', render_mode='human') | ||||
| observation = env.reset(seed=1) | ||||
| env.render() | ||||
| 
 | ||||
| for i in range(1000): | ||||
|     action = env.action_space.sample() | ||||
|     observation, reward, terminated, truncated, info = env.step(action) | ||||
|     if i % 5 == 0: | ||||
|         env.render() | ||||
| 
 | ||||
|     if terminated or truncated: | ||||
|         observation, info = env.reset() | ||||
| @ -149,17 +148,14 @@ Just keep in mind, calling `step()` executes a full trajectory. | ||||
| import gymnasium as gym | ||||
| import fancy_gym | ||||
| 
 | ||||
| env = gym.make('fancy_ProMP/Reacher5d-v0') | ||||
| # or env = gym.make('metaworld_ProDMP/reach-v2') | ||||
| # or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0') | ||||
| # or env = gym.make('gym_ProMP/Reacher-v2') # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS | ||||
| 
 | ||||
| # render() can be called once in the beginning with all necessary arguments. | ||||
| # To turn it of again just call render() without any arguments. | ||||
| env.render(mode='human') | ||||
| env = gym.make('fancy_ProMP/Reacher5d-v0', render_mode="human") | ||||
| # or env = gym.make('metaworld_ProDMP/reach-v2', render_mode="human") | ||||
| # or env = gym.make('dm_control_DMP/ball_in_cup-catch-v0', render_mode="human") | ||||
| # or env = gym.make('gym_ProMP/Reacher-v2', render_mode="human") # mp versions of envs added directly by gymnasium are in the gym_<MP-type> NS | ||||
| 
 | ||||
| # This returns the context information, not the full state observation | ||||
| observation, info = env.reset(seed=1) | ||||
| env.render() | ||||
| 
 | ||||
| for i in range(5): | ||||
|     action = env.action_space.sample() | ||||
|  | ||||
| @ -291,7 +291,7 @@ register( | ||||
| ) | ||||
| 
 | ||||
| # Air Hockey environments | ||||
| for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend"]: | ||||
| for env_mode in ["7dof-hit", "7dof-defend", "3dof-hit", "3dof-defend", "7dof-hit-airhockit2023", "7dof-defend-airhockit2023"]: | ||||
|     register( | ||||
|         id=f'fancy/AirHockey-{env_mode}-v0', | ||||
|         entry_point='fancy_gym.envs.mujoco:AirHockeyEnv', | ||||
|  | ||||
| @ -8,9 +8,9 @@ from fancy_gym.envs.mujoco.air_hockey.utils import robot_to_world | ||||
| from mushroom_rl.core import Environment | ||||
| 
 | ||||
| class AirHockeyEnv(Environment): | ||||
|     metadata = {"render_modes": ["human"], "render_fps": 50} | ||||
|     metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 50} | ||||
| 
 | ||||
|     def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, **kwargs): | ||||
|     def __init__(self, env_mode=None, interpolation_order=3, render_mode=None, width=1920, height=1080, **kwargs): | ||||
|         """ | ||||
|         Environment Constructor | ||||
| 
 | ||||
| @ -30,7 +30,10 @@ class AirHockeyEnv(Environment): | ||||
|             "7dof-defend": position.IiwaPositionDefend, | ||||
| 
 | ||||
|             "3dof-hit": position.PlanarPositionHit, | ||||
|             "3dof-defend": position.PlanarPositionDefend | ||||
|             "3dof-defend": position.PlanarPositionDefend, | ||||
| 
 | ||||
|             "7dof-hit-airhockit2023": position.IiwaPositionHitAirhocKIT2023, | ||||
|             "7dof-defend-airhockit2023": position.IiwaPositionDefendAirhocKIT2023, | ||||
|         } | ||||
| 
 | ||||
|         if env_mode not in env_dict: | ||||
| @ -39,37 +42,53 @@ class AirHockeyEnv(Environment): | ||||
|         if env_mode == "tournament" and type(interpolation_order) != tuple: | ||||
|             interpolation_order = (interpolation_order, interpolation_order) | ||||
| 
 | ||||
|         self.render_mode = render_mode | ||||
|         self.render_human_active = False | ||||
| 
 | ||||
|         # Determine headless mode based on render_mode | ||||
|         headless = self.render_mode == 'rgb_array' | ||||
|          | ||||
|         # Prepare viewer_params | ||||
|         viewer_params = kwargs.get('viewer_params', {}) | ||||
|         viewer_params.update({'headless': headless, 'width': width, 'height': height}) | ||||
|         kwargs['viewer_params'] = viewer_params | ||||
| 
 | ||||
|         self.base_env = env_dict[env_mode](interpolation_order=interpolation_order, **kwargs) | ||||
|         self.env_name = env_mode | ||||
|         self.env_info = self.base_env.env_info | ||||
|         single_robot_obs_size = len(self.base_env.info.observation_space.low) | ||||
|         if env_mode == "tournament": | ||||
|             self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64) | ||||
|         else: | ||||
|             self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64) | ||||
|         robot_info = self.env_info["robot"] | ||||
| 
 | ||||
|         if env_mode != "tournament": | ||||
|             if interpolation_order in [1, 2]: | ||||
|                 self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) | ||||
|             if interpolation_order in [3, 4, -1]: | ||||
|                 self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),  | ||||
|                                             high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) | ||||
|             if interpolation_order in [5]: | ||||
|                 self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),  | ||||
|                                             high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) | ||||
|         if hasattr(self.base_env, "wrapper_obs_space") and hasattr(self.base_env, "wrapper_act_space"): | ||||
|             self.observation_space = self.base_env.wrapper_obs_space | ||||
|             self.action_space = self.base_env.wrapper_act_space | ||||
|         else: | ||||
|             acts = [None, None] | ||||
|             for i in range(2): | ||||
|                 if interpolation_order[i] in [1, 2]: | ||||
|                     acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) | ||||
|                 if interpolation_order[i] in [3, 4, -1]: | ||||
|                     acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),  | ||||
|                                             high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) | ||||
|                 if interpolation_order[i] in [5]: | ||||
|                     acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),  | ||||
|                                             high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) | ||||
|             self.action_space = spaces.Tuple((acts[0], acts[1])) | ||||
|             single_robot_obs_size = len(self.base_env.info.observation_space.low) | ||||
|             if env_mode == "tournament": | ||||
|                 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,single_robot_obs_size), dtype=np.float64) | ||||
|             else: | ||||
|                 self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(single_robot_obs_size,), dtype=np.float64) | ||||
|             robot_info = self.env_info["robot"] | ||||
| 
 | ||||
|             if env_mode != "tournament": | ||||
|                 if interpolation_order in [1, 2]: | ||||
|                     self.action_space = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) | ||||
|                 if interpolation_order in [3, 4, -1]: | ||||
|                     self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),  | ||||
|                                                 high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) | ||||
|                 if interpolation_order in [5]: | ||||
|                     self.action_space = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),  | ||||
|                                                 high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) | ||||
|             else: | ||||
|                 acts = [None, None] | ||||
|                 for i in range(2): | ||||
|                     if interpolation_order[i] in [1, 2]: | ||||
|                         acts[i] = spaces.Box(low=robot_info["joint_pos_limit"][0], high=robot_info["joint_pos_limit"][1]) | ||||
|                     if interpolation_order[i] in [3, 4, -1]: | ||||
|                         acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0]]),  | ||||
|                                                 high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1]])) | ||||
|                     if interpolation_order[i] in [5]: | ||||
|                         acts[i] = spaces.Box(low=np.vstack([robot_info["joint_pos_limit"][0], robot_info["joint_vel_limit"][0], robot_info["joint_acc_limit"][0]]),  | ||||
|                                                 high=np.vstack([robot_info["joint_pos_limit"][1], robot_info["joint_vel_limit"][1], robot_info["joint_acc_limit"][1]])) | ||||
|                 self.action_space = spaces.Tuple((acts[0], acts[1])) | ||||
| 
 | ||||
|         constraint_list = constraints.ConstraintList() | ||||
|         constraint_list.add(constraints.JointPositionConstraint(self.env_info)) | ||||
| @ -81,9 +100,6 @@ class AirHockeyEnv(Environment): | ||||
|         self.env_info['constraints'] = constraint_list | ||||
|         self.env_info['env_name'] = self.env_name | ||||
| 
 | ||||
|         self.render_mode = render_mode | ||||
|         self.render_human_active = False | ||||
| 
 | ||||
|         super().__init__(self.base_env.info) | ||||
| 
 | ||||
|     def step(self, action): | ||||
| @ -111,15 +127,21 @@ class AirHockeyEnv(Environment): | ||||
| 
 | ||||
|         if self.env_info['env_name'] == "tournament": | ||||
|             obs = np.array(np.split(obs, 2)) | ||||
|          | ||||
| 
 | ||||
|         if self.render_human_active: | ||||
|             self.base_env.render() | ||||
| 
 | ||||
|         return obs, reward, done, False, info | ||||
| 
 | ||||
|     def render(self): | ||||
|         self.render_human_active = True | ||||
| 
 | ||||
|         if self.render_mode == 'rgb_array': | ||||
|             return self.base_env.render(record = True) | ||||
|         elif self.render_mode == 'human': | ||||
|             self.render_human_active = True | ||||
|             self.base_env.render() | ||||
|         else: | ||||
|             raise ValueError(f"Unsupported render mode: '{self.render_mode}'") | ||||
|              | ||||
|     def reset(self, seed=None, options={}): | ||||
|         self.base_env.seed(seed) | ||||
|         obs = self.base_env.reset() | ||||
| @ -177,4 +199,4 @@ if __name__ == "__main__": | ||||
|             J = 0. | ||||
|             gamma = 1. | ||||
|             steps = 0 | ||||
|             env.reset() | ||||
|             env.reset() | ||||
|  | ||||
| @ -261,10 +261,14 @@ class PlanarPositionDefend(PositionControlPlanar, three_dof.AirHockeyDefend): | ||||
| class IiwaPositionHit(PositionControlIIWA, seven_dof.AirHockeyHit): | ||||
|     pass | ||||
| 
 | ||||
| class IiwaPositionHitAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyHitAirhocKIT2023): | ||||
|     pass | ||||
| 
 | ||||
| class IiwaPositionDefend(PositionControlIIWA, seven_dof.AirHockeyDefend): | ||||
|     pass | ||||
| 
 | ||||
| class IiwaPositionDefendAirhocKIT2023(PositionControlIIWA, seven_dof.AirHockeyDefendAirhocKIT2023): | ||||
|     pass | ||||
| 
 | ||||
| class IiwaPositionTournament(PositionControlIIWA, seven_dof.AirHockeyTournament): | ||||
|     pass | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| from .env_base import AirHockeyBase | ||||
| from .tournament import AirHockeyTournament | ||||
| from .hit import AirHockeyHit | ||||
| from .defend import AirHockeyDefend | ||||
| from .hit import AirHockeyHit, AirHockeyHitAirhocKIT2023 | ||||
| from .defend import AirHockeyDefend, AirHockeyDefendAirhocKIT2023 | ||||
							
								
								
									
										114
									
								
								fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								fancy_gym/envs/mujoco/air_hockey/seven_dof/airhockit_base_env.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,114 @@ | ||||
| import numpy as np | ||||
| from gymnasium import spaces | ||||
| 
 | ||||
| from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle | ||||
| from fancy_gym.envs.mujoco.air_hockey.utils import inverse_kinematics, forward_kinematics, jacobian | ||||
| 
 | ||||
| class AirhocKIT2023BaseEnv(AirHockeySingle): | ||||
|     def __init__(self, noise=False, **kwargs): | ||||
|         super().__init__(**kwargs) | ||||
|         obs_low = np.hstack([[-np.inf] * 37]) | ||||
|         obs_high = np.hstack([[np.inf] * 37]) | ||||
|         self.wrapper_obs_space = spaces.Box(low=obs_low, high=obs_high, dtype=np.float64) | ||||
|         self.wrapper_act_space = spaces.Box(low=np.repeat(-100., 6), high=np.repeat(100., 6)) | ||||
|         self.noise = noise | ||||
| 
 | ||||
|     # We don't need puck yaw observations | ||||
|     def filter_obs(self, obs): | ||||
|         obs = np.hstack([obs[0:2], obs[3:5], obs[6:12], obs[13:19], obs[20:]]) | ||||
|         return obs | ||||
| 
 | ||||
|     def add_noise(self, obs): | ||||
|         if not self.noise: | ||||
|             return | ||||
|         obs[self.env_info["puck_pos_ids"]] += np.random.normal(0, 0.001, 3) | ||||
|         obs[self.env_info["puck_vel_ids"]] += np.random.normal(0, 0.1, 3) | ||||
|      | ||||
|     def reset(self): | ||||
|         self.last_acceleration = np.repeat(0., 6) | ||||
|         obs = super().reset() | ||||
|         self.add_noise(obs) | ||||
|         self.interp_pos = obs[self.env_info["joint_pos_ids"]][:-1] | ||||
|         self.interp_vel = obs[self.env_info["joint_vel_ids"]][:-1] | ||||
| 
 | ||||
|         self.last_planned_world_pos = self._fk(self.interp_pos) | ||||
|         obs = np.hstack([ | ||||
|             obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos | ||||
|         ]) | ||||
|         return self.filter_obs(obs) | ||||
| 
 | ||||
|     def step(self, action): | ||||
|         action /= 10 | ||||
| 
 | ||||
|         new_vel = self.interp_vel + action | ||||
|          | ||||
|         jerk = 2 * (new_vel - self.interp_vel - self.last_acceleration * 0.02) / (0.02 ** 2) | ||||
|         new_pos = self.interp_pos + self.interp_vel * 0.02 + (1/2) * self.last_acceleration * (0.02 ** 2) + (1/6) * jerk * (0.02 ** 3) | ||||
|         abs_action = np.vstack([np.hstack([new_pos, 0]), np.hstack([new_vel, 0])]) | ||||
| 
 | ||||
|         self.interp_pos = new_pos | ||||
|         self.interp_vel = new_vel | ||||
|         self.last_acceleration += jerk * 0.02 | ||||
| 
 | ||||
|         obs, rew, done, info = super().step(abs_action) | ||||
|         self.add_noise(obs) | ||||
|         self.last_planned_world_pos = self._fk(self.interp_pos) | ||||
|         obs = np.hstack([ | ||||
|             obs, self.interp_pos, self.interp_vel, self.last_acceleration, self.last_planned_world_pos | ||||
|         ]) | ||||
|          | ||||
|         fatal_rew = self.check_fatal(obs) | ||||
|         if fatal_rew != 0: | ||||
|             return self.filter_obs(obs), fatal_rew, True, info | ||||
|          | ||||
|         return self.filter_obs(obs), rew, done, info | ||||
|      | ||||
|     def check_constraints(self, constraint_values): | ||||
|         fatal_rew = 0 | ||||
| 
 | ||||
|         j_pos_constr = constraint_values["joint_pos_constr"] | ||||
|         if j_pos_constr.max() > 0: | ||||
|             fatal_rew += j_pos_constr.max() | ||||
| 
 | ||||
|         j_vel_constr = constraint_values["joint_vel_constr"] | ||||
|         if j_vel_constr.max() > 0: | ||||
|             fatal_rew += j_vel_constr.max() | ||||
| 
 | ||||
|         ee_constr = constraint_values["ee_constr"] | ||||
|         if ee_constr.max() > 0: | ||||
|             fatal_rew += ee_constr.max() | ||||
| 
 | ||||
|         link_constr = constraint_values["link_constr"] | ||||
|         if link_constr.max() > 0: | ||||
|             fatal_rew += link_constr.max() | ||||
|          | ||||
|         return -fatal_rew | ||||
| 
 | ||||
|     def check_fatal(self, obs): | ||||
|         fatal_rew = 0 | ||||
|          | ||||
|         q = obs[self.env_info["joint_pos_ids"]] | ||||
|         qd = obs[self.env_info["joint_vel_ids"]] | ||||
|         constraint_values_obs = self.env_info["constraints"].fun(q, qd) | ||||
|         fatal_rew += self.check_constraints(constraint_values_obs) | ||||
| 
 | ||||
|         return -fatal_rew | ||||
| 
 | ||||
|     def _fk(self, pos): | ||||
|         res, _ = forward_kinematics(self.env_info["robot"]["robot_model"], | ||||
|                                     self.env_info["robot"]["robot_data"], pos) | ||||
|         return res.astype(np.float32) | ||||
| 
 | ||||
|     def _ik(self, world_pos, init_q=None): | ||||
|         success, pos = inverse_kinematics(self.env_info["robot"]["robot_model"], | ||||
|                                           self.env_info["robot"]["robot_data"], | ||||
|                                           world_pos, | ||||
|                                           initial_q=init_q) | ||||
|         pos = pos.astype(np.float32) | ||||
|         assert success | ||||
|         return pos | ||||
| 
 | ||||
|     def _jacobian(self, pos): | ||||
|         return jacobian(self.env_info["robot"]["robot_model"], | ||||
|                         self.env_info["robot"]["robot_data"], | ||||
|                         pos).astype(np.float32) | ||||
| @ -1,6 +1,7 @@ | ||||
| import numpy as np | ||||
| 
 | ||||
| from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle | ||||
| from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv | ||||
| 
 | ||||
| 
 | ||||
| class AirHockeyDefend(AirHockeySingle): | ||||
| @ -10,9 +11,7 @@ class AirHockeyDefend(AirHockeySingle): | ||||
|     """ | ||||
|     def __init__(self, gamma=0.99, horizon=500, viewer_params={}): | ||||
|         self.init_velocity_range = (1, 3) | ||||
| 
 | ||||
|         self.start_range = np.array([[0.29, 0.65], [-0.4, 0.4]])  # Table Frame | ||||
|         self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame | ||||
|         super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) | ||||
| 
 | ||||
|     def setup(self, obs): | ||||
| @ -32,7 +31,7 @@ class AirHockeyDefend(AirHockeySingle): | ||||
|         self._write_data("puck_y_vel", puck_vel[1]) | ||||
|         self._write_data("puck_yaw_vel", puck_vel[2]) | ||||
| 
 | ||||
|         super(AirHockeyDefend, self).setup(obs) | ||||
|         super().setup(obs) | ||||
| 
 | ||||
|     def reward(self, state, action, next_state, absorbing): | ||||
|         return 0 | ||||
| @ -46,6 +45,98 @@ class AirHockeyDefend(AirHockeySingle): | ||||
|             return True | ||||
|         return super().is_absorbing(state) | ||||
| 
 | ||||
| class AirHockeyDefendAirhocKIT2023(AirhocKIT2023BaseEnv): | ||||
|     def __init__(self, gamma=0.99, horizon=200, viewer_params={}, **kwargs): | ||||
|         super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs) | ||||
|         self.init_velocity_range = (1, 3) | ||||
|         self.start_range = np.array([[0.4, 0.75], [-0.4, 0.4]])  # Table Frame | ||||
|         self._setup_metrics() | ||||
|          | ||||
|     def setup(self, obs): | ||||
|         self._setup_metrics() | ||||
|         puck_pos = np.random.rand(2) * (self.start_range[:, 1] - self.start_range[:, 0]) + self.start_range[:, 0] | ||||
| 
 | ||||
|         lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) | ||||
|         angle = np.random.uniform(-0.5, 0.5) | ||||
| 
 | ||||
|         puck_vel = np.zeros(3) | ||||
|         puck_vel[0] = -np.cos(angle) * lin_vel | ||||
|         puck_vel[1] = np.sin(angle) * lin_vel | ||||
|         puck_vel[2] = np.random.uniform(-10, 10) | ||||
| 
 | ||||
|         self._write_data("puck_x_pos", puck_pos[0]) | ||||
|         self._write_data("puck_y_pos", puck_pos[1]) | ||||
|         self._write_data("puck_x_vel", puck_vel[0]) | ||||
|         self._write_data("puck_y_vel", puck_vel[1]) | ||||
|         self._write_data("puck_yaw_vel", puck_vel[2]) | ||||
| 
 | ||||
|         super().setup(obs) | ||||
| 
 | ||||
|     def reset(self, *args): | ||||
|         obs = super().reset() | ||||
|         self.hit_step_flag = False | ||||
|         self.hit_step = False | ||||
|         self.received_hit_reward = False | ||||
|         self.give_reward_next = False | ||||
|         return obs | ||||
| 
 | ||||
|     def _setup_metrics(self): | ||||
|         self.episode_steps = 0 | ||||
|         self.has_hit = False | ||||
| 
 | ||||
|     def _simulation_post_step(self): | ||||
|         if not self.has_hit: | ||||
|             self.has_hit = self._check_collision("puck", "robot_1/ee") | ||||
| 
 | ||||
|         super()._simulation_post_step() | ||||
| 
 | ||||
|     def _step_finalize(self): | ||||
|         self.episode_steps += 1 | ||||
|         return super()._step_finalize() | ||||
|      | ||||
|     def reward(self, state, action, next_state, absorbing): | ||||
|         puck_pos, puck_vel = self.get_puck(next_state) | ||||
|         ee_pos, _ = self.get_ee() | ||||
|         rew = 0.01 | ||||
|         if -0.7 < puck_pos[0] <= -0.2 and np.linalg.norm(puck_vel[:2]) < 0.1: | ||||
|             assert absorbing | ||||
|             rew += 70 | ||||
| 
 | ||||
|         if self.has_hit and not self.hit_step_flag: | ||||
|             self.hit_step_flag = True | ||||
|             self.hit_step = True | ||||
|         else: | ||||
|             self.hit_step = False | ||||
| 
 | ||||
|         f = lambda puck_vel: 30 + 100 * (100 ** (-0.25 * np.linalg.norm(puck_vel[:2]))) | ||||
|         if not self.give_reward_next and not self.received_hit_reward and self.hit_step and ee_pos[0] < puck_pos[0]: | ||||
|             self.hit_this_step = True | ||||
|             if np.linalg.norm(puck_vel[:2]) < 0.1: | ||||
|                 return rew + f(puck_vel) | ||||
|             self.give_reward_next = True | ||||
|             return rew | ||||
| 
 | ||||
|         if not self.received_hit_reward and self.give_reward_next: | ||||
|             self.received_hit_reward = True | ||||
|             if puck_vel[0] >= -0.2: | ||||
|                 return rew + f(puck_vel) | ||||
|             return rew | ||||
|         else: | ||||
|             return rew | ||||
| 
 | ||||
|     def is_absorbing(self, obs): | ||||
|         puck_pos, puck_vel = self.get_puck(obs) | ||||
|         # If puck is over the middle line and moving towards opponent | ||||
|         if puck_pos[0] > 0 and puck_vel[0] > 0: | ||||
|             return True | ||||
| 
 | ||||
|         if self.episode_steps == self._mdp_info.horizon: | ||||
|             return True | ||||
|              | ||||
|         if np.linalg.norm(puck_vel[:2]) < 0.1: | ||||
|             return True | ||||
|         return super().is_absorbing(obs) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     env = AirHockeyDefend() | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| import numpy as np | ||||
| 
 | ||||
| from fancy_gym.envs.mujoco.air_hockey.seven_dof.env_single import AirHockeySingle | ||||
| from fancy_gym.envs.mujoco.air_hockey.seven_dof.airhockit_base_env import AirhocKIT2023BaseEnv | ||||
| 
 | ||||
| 
 | ||||
| class AirHockeyHit(AirHockeySingle): | ||||
| @ -14,9 +15,6 @@ class AirHockeyHit(AirHockeySingle): | ||||
|                 opponent_agent(Agent, None): Agent which controls the opponent | ||||
|                 moving_init(bool, False): If true, initialize the puck with inital velocity. | ||||
|         """ | ||||
|         self.hit_range = np.array([[-0.65, -0.25], [-0.4, 0.4]])  # Table Frame | ||||
|         self.init_velocity_range = (0, 0.5)  # Table Frame | ||||
| 
 | ||||
|         super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params) | ||||
| 
 | ||||
|         self.moving_init = moving_init | ||||
| @ -58,6 +56,93 @@ class AirHockeyHit(AirHockeySingle): | ||||
|             return True | ||||
|         return super(AirHockeyHit, self).is_absorbing(obs) | ||||
| 
 | ||||
| class AirHockeyHitAirhocKIT2023(AirhocKIT2023BaseEnv): | ||||
|     def __init__(self, gamma=0.99, horizon=500, moving_init=True, viewer_params={}, **kwargs): | ||||
|         super().__init__(gamma=gamma, horizon=horizon, viewer_params=viewer_params, **kwargs) | ||||
| 
 | ||||
|         self.moving_init = moving_init | ||||
|         hit_width = self.env_info['table']['width'] / 2 - self.env_info['puck']['radius'] - \ | ||||
|                     self.env_info['mallet']['radius'] * 2 | ||||
|         self.hit_range = np.array([[-0.7, -0.2], [-hit_width, hit_width]])  # Table Frame | ||||
|         self.init_velocity_range = (0, 0.5)  # Table Frame | ||||
|         self.init_ee_range = np.array([[0.60, 1.25], [-0.4, 0.4]])  # Robot Frame | ||||
|         self._setup_metrics() | ||||
| 
 | ||||
|     def reset(self, *args): | ||||
|         obs = super().reset() | ||||
|         self.last_ee_pos = self.last_planned_world_pos.copy() | ||||
|         self.last_ee_pos[0] -= 1.51 | ||||
|         return obs | ||||
| 
 | ||||
|     def setup(self, obs): | ||||
|         self._setup_metrics() | ||||
|         puck_pos = np.random.rand(2) * (self.hit_range[:, 1] - self.hit_range[:, 0]) + self.hit_range[:, 0] | ||||
| 
 | ||||
|         self._write_data("puck_x_pos", puck_pos[0]) | ||||
|         self._write_data("puck_y_pos", puck_pos[1]) | ||||
| 
 | ||||
|         if self.moving_init: | ||||
|             lin_vel = np.random.uniform(self.init_velocity_range[0], self.init_velocity_range[1]) | ||||
|             angle = np.random.uniform(-np.pi / 2 - 0.1, np.pi / 2 + 0.1) | ||||
|             puck_vel = np.zeros(3) | ||||
|             puck_vel[0] = -np.cos(angle) * lin_vel | ||||
|             puck_vel[1] = np.sin(angle) * lin_vel | ||||
|             puck_vel[2] = np.random.uniform(-2, 2) | ||||
| 
 | ||||
|             self._write_data("puck_x_vel", puck_vel[0]) | ||||
|             self._write_data("puck_y_vel", puck_vel[1]) | ||||
|             self._write_data("puck_yaw_vel", puck_vel[2]) | ||||
| 
 | ||||
|         super().setup(obs) | ||||
| 
 | ||||
|     def _setup_metrics(self): | ||||
|         self.episode_steps = 0 | ||||
|         self.has_scored = False | ||||
| 
 | ||||
|     def _step_finalize(self): | ||||
|         cur_obs = self._create_observation(self.obs_helper._build_obs(self._data)) | ||||
|         puck_pos, _ = self.get_puck(cur_obs)  # world frame [x, y, z] and [x', y', z'] | ||||
| 
 | ||||
|         if not self.has_scored: | ||||
|             boundary = np.array([self.env_info['table']['length'], self.env_info['table']['width']]) / 2 | ||||
|             self.has_scored = np.any(np.abs(puck_pos[:2]) > boundary) and puck_pos[0] > 0 | ||||
| 
 | ||||
|         self.episode_steps += 1 | ||||
|         return super()._step_finalize() | ||||
|      | ||||
|     def reward(self, state, action, next_state, absorbing): | ||||
|         rew = 0 | ||||
|         puck_pos, puck_vel = self.get_puck(next_state) | ||||
|         ee_pos, _ = self.get_ee() | ||||
|         ee_vel = (ee_pos - self.last_ee_pos) / 0.02 | ||||
|         self.last_ee_pos = ee_pos | ||||
| 
 | ||||
|         if puck_vel[0] < 0.25 and puck_pos[0] < 0: | ||||
|             ee_puck_dir = (puck_pos - ee_pos)[:2] | ||||
|             ee_puck_dir = ee_puck_dir / np.linalg.norm(ee_puck_dir) | ||||
|             rew += 1 * max(0, np.dot(ee_puck_dir, ee_vel[:2])) | ||||
|         else: | ||||
|             rew += 10 * np.linalg.norm(puck_vel[:2]) | ||||
| 
 | ||||
|         if self.has_scored: | ||||
|             rew += 2000 + 5000 * np.linalg.norm(puck_vel[:2]) | ||||
|          | ||||
|         return rew | ||||
| 
 | ||||
|     def is_absorbing(self, obs): | ||||
|         puck_pos, puck_vel = self.get_puck(obs) | ||||
|         # Stop if the puck bounces back on the opponents wall | ||||
|         if puck_pos[0] > 0 and puck_vel[0] < 0: | ||||
|             return True | ||||
| 
 | ||||
|         if self.has_scored: | ||||
|             return True | ||||
| 
 | ||||
|         if self.episode_steps == self._mdp_info.horizon: | ||||
|             return True | ||||
| 
 | ||||
|         return super().is_absorbing(obs) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     env = AirHockeyHit(moving_init=True) | ||||
|  | ||||
| @ -0,0 +1 @@ | ||||
| # TODO | ||||
| @ -6,21 +6,21 @@ def example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", | ||||
|     env = gym.make(env_name) | ||||
|     env.reset(seed=seed) | ||||
|     for i in range(iterations): | ||||
|         done = False | ||||
|         while done is False: | ||||
|         while True: | ||||
|             ac = env.action_space.sample() | ||||
|             obs, reward, terminated, truncated, info = env.step(ac) | ||||
|             if render: | ||||
|                 env.render(mode="human") | ||||
|             if terminated or truncated: | ||||
|                 env.reset() | ||||
|                 break | ||||
|     env.close() | ||||
|     del env | ||||
| 
 | ||||
| 
 | ||||
| def example_custom_replanning_envs(seed=0, iteration=100, render=True): | ||||
|     # id for a step-based environment | ||||
|     base_env_id = "BoxPushingDense-v0" | ||||
|     base_env_id = "fancy/BoxPushingDense-v0" | ||||
| 
 | ||||
|     wrappers = [fancy_gym.envs.mujoco.box_pushing.mp_wrapper.MPWrapper] | ||||
| 
 | ||||
| @ -38,7 +38,8 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): | ||||
|                         'replanning_schedule': lambda pos, vel, obs, action, t: t % 25 == 0, | ||||
|                         'condition_on_desired': True} | ||||
| 
 | ||||
|     env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs=black_box_kwargs, | ||||
|     base_env = gym.make(base_env_id) | ||||
|     env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs=black_box_kwargs, | ||||
|                             traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, | ||||
|                             phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, | ||||
|                             seed=seed) | ||||
| @ -56,10 +57,12 @@ def example_custom_replanning_envs(seed=0, iteration=100, render=True): | ||||
|     env.close() | ||||
|     del env | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| def main(render=False): | ||||
|     # run a registered replanning environment | ||||
|     example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=False) | ||||
|     example_run_replanning_env(env_name="fancy_ProDMP/BoxPushingDenseReplan-v0", seed=1, iterations=1, render=render) | ||||
| 
 | ||||
|     # run a custom replanning environment | ||||
|     example_custom_replanning_envs(seed=0, iteration=8, render=True) | ||||
|     example_custom_replanning_envs(seed=0, iteration=8, render=render) | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @ -84,7 +84,8 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): | ||||
|     # basis_generator_kwargs = {'basis_generator_type': 'rbf', | ||||
|     #                           'num_basis': 5 | ||||
|     #                           } | ||||
|     env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, | ||||
|     base_env = gym.make(base_env_id) | ||||
|     env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, | ||||
|                             traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, | ||||
|                             phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, | ||||
|                             seed=seed) | ||||
| @ -114,21 +115,13 @@ def example_custom_dmc_and_mp(seed=1, iterations=1, render=True): | ||||
|     env.close() | ||||
|     del env | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     # Disclaimer: DMC environments require the seed to be specified in the beginning. | ||||
|     # Adjusting it afterwards with env.seed() is not recommended as it does not affect the underlying physics. | ||||
| 
 | ||||
|     # For rendering DMC | ||||
|     # export MUJOCO_GL="osmesa" | ||||
|     render = True | ||||
| 
 | ||||
| def main(render = True): | ||||
|     # # Standard DMC Suite tasks | ||||
|     example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) | ||||
|     # | ||||
|     # # Manipulation tasks | ||||
|     # # Disclaimer: The vision versions are currently not integrated and yield an error | ||||
|     example_dmc("dm_control/manipulation-reach_site_features", seed=10, iterations=250, render=render) | ||||
|     example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render) | ||||
|     # | ||||
|     # # Gym + DMC hybrid task provided in the MP framework | ||||
|     example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) | ||||
| @ -136,3 +129,20 @@ if __name__ == '__main__': | ||||
|     # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is | ||||
|     # already registered above | ||||
|     example_custom_dmc_and_mp(seed=11, iterations=1, render=render) | ||||
| 
 | ||||
|     # # Standard DMC Suite tasks | ||||
|     example_dmc("dm_control/fish-swim", seed=10, iterations=1000, render=render) | ||||
|     # | ||||
|     # # Manipulation tasks | ||||
|     # # Disclaimer: The vision versions are currently not integrated and yield an error | ||||
|     example_dmc("dm_control/reach_site_features", seed=10, iterations=250, render=render) | ||||
|     # | ||||
|     # # Gym + DMC hybrid task provided in the MP framework | ||||
|     example_dmc("dm_control_ProMP/ball_in_cup-catch-v0", seed=10, iterations=1, render=render) | ||||
| 
 | ||||
|     # Custom DMC task # Different seed, because the episode is longer for this example and the name+seed combo is | ||||
|     # already registered above | ||||
|     example_custom_dmc_and_mp(seed=11, iterations=1, render=render) | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @ -85,10 +85,7 @@ def example_async(env_id="fancy/HoleReacher-v0", n_cpu=4, seed=int('533D', 16), | ||||
|     # do not return values above threshold | ||||
|     return *map(lambda v: np.stack(v)[:n_samples], buffer.values()), | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     render = True | ||||
| 
 | ||||
| def main(render = True): | ||||
|     # Basic gym task | ||||
|     example_general("Pendulum-v1", seed=10, iterations=200, render=render) | ||||
| 
 | ||||
| @ -100,3 +97,6 @@ if __name__ == '__main__': | ||||
| 
 | ||||
|     # Vectorized multiprocessing environments | ||||
|     # example_async(env_id="HoleReacher-v0", n_cpu=2, seed=int('533D', 16), n_samples=2 * 200) | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @ -35,7 +35,7 @@ def example_meta(env_id="fish-swim", seed=1, iterations=1000, render=True): | ||||
|         if terminated or truncated: | ||||
|             print(env_id, rewards) | ||||
|             rewards = 0 | ||||
|             obs = env.reset() | ||||
|             obs = env.reset(seed=seed+i+1) | ||||
| 
 | ||||
|     env.close() | ||||
|     del env | ||||
| @ -81,7 +81,8 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): | ||||
|     basis_generator_kwargs = {'basis_generator_type': 'rbf', | ||||
|                               'num_basis': 5 | ||||
|                               } | ||||
|     env = fancy_gym.make_bb(env_id=base_env_id, wrappers=wrappers, black_box_kwargs={}, | ||||
|     base_env = gym.make(base_env_id) | ||||
|     env = fancy_gym.make_bb(env=base_env, wrappers=wrappers, black_box_kwargs={}, | ||||
|                             traj_gen_kwargs=trajectory_generator_kwargs, controller_kwargs=controller_kwargs, | ||||
|                             phase_kwargs=phase_generator_kwargs, basis_kwargs=basis_generator_kwargs, | ||||
|                             seed=seed) | ||||
| @ -92,14 +93,10 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): | ||||
|     # It is also possible to change them mode multiple times when | ||||
|     # e.g. only every nth trajectory should be displayed. | ||||
|     if render: | ||||
|         raise ValueError("Metaworld render interface bug does not allow to render() fixes its interface. " | ||||
|                          "A temporary workaround is to alter their code in MujocoEnv render() from " | ||||
|                          "`if not offscreen` to `if not offscreen or offscreen == 'human'`.") | ||||
|         # TODO: Remove this, when Metaworld fixes its interface. | ||||
|         # env.render(mode="human") | ||||
|         env.render(mode="human") | ||||
| 
 | ||||
|     rewards = 0 | ||||
|     obs = env.reset() | ||||
|     obs = env.reset(seed=seed) | ||||
| 
 | ||||
|     # number of samples/full trajectories (multiple environment steps) | ||||
|     for i in range(iterations): | ||||
| @ -110,25 +107,23 @@ def example_custom_meta_and_mp(seed=1, iterations=1, render=True): | ||||
|         if terminated or truncated: | ||||
|             print(base_env_id, rewards) | ||||
|             rewards = 0 | ||||
|             obs = env.reset() | ||||
|             obs = env.reset(seed=seed+i+1) | ||||
| 
 | ||||
|     env.close() | ||||
|     del env | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     # Disclaimer: MetaWorld environments require the seed to be specified in the beginning. | ||||
|     # Adjusting it afterwards with env.seed() is not recommended as it may not affect the underlying behavior. | ||||
| 
 | ||||
| def main(render = False): | ||||
|     # For rendering it might be necessary to specify your OpenGL installation | ||||
|     # export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so | ||||
|     render = False | ||||
| 
 | ||||
|     # # Standard Meta world tasks | ||||
|     example_meta("metaworld/button-press-v2", seed=10, iterations=500, render=render) | ||||
| 
 | ||||
|     # # MP + MetaWorld hybrid task provided in the our framework | ||||
|     example_meta("metaworld_ProMP/ButtonPress-v2", seed=10, iterations=1, render=render) | ||||
|     example_meta("metaworld_ProMP/button-press-v2", seed=10, iterations=1, render=render) | ||||
|     # | ||||
|     # # Custom MetaWorld task | ||||
|     example_custom_meta_and_mp(seed=10, iterations=1, render=render) | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
| @ -26,6 +26,8 @@ def example_mp(env_name="fancy_ProMP/HoleReacher-v0", seed=1, iterations=1, rend | ||||
|     for i in range(iterations): | ||||
| 
 | ||||
|         if render and i % 1 == 0: | ||||
|             # This renders the full MP trajectory | ||||
|             # It is only required to call render() once in the beginning, which renders every consecutive trajectory. | ||||
|             env.render() | ||||
| 
 | ||||
|         # Now the action space is not the raw action but the parametrization of the trajectory generator, | ||||
| @ -248,8 +250,7 @@ def example_fully_custom_mp_alternative(seed=1, iterations=1, render=True): | ||||
|         pass | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     render = False | ||||
| def main(render=False): | ||||
|     # DMP | ||||
|     example_mp("fancy_DMP/HoleReacher-v0", seed=10, iterations=5, render=render) | ||||
| 
 | ||||
|  | ||||
| @ -31,6 +31,8 @@ def example_mp(env_name, seed=1, render=True): | ||||
|             print(returns) | ||||
|             obs = env.reset() | ||||
| 
 | ||||
| def main(render=True): | ||||
|     example_mp("gym_ProMP/Reacher-v2", render=render) | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     example_mp("gym_ProMP/Reacher-v2") | ||||
|     main() | ||||
							
								
								
									
										13
									
								
								test/test_examples.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								test/test_examples.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| import pytest | ||||
| 
 | ||||
| from fancy_gym.examples.example_replanning_envs import main as replanning_envs_main | ||||
| from fancy_gym.examples.examples_dmc import main as dmc_main | ||||
| from fancy_gym.examples.examples_general import main as general_main | ||||
| from fancy_gym.examples.examples_metaworld import main as metaworld_main | ||||
| from fancy_gym.examples.examples_movement_primitives import main as mp_main | ||||
| from fancy_gym.examples.examples_open_ai import main as open_ai_main | ||||
| 
 | ||||
| @pytest.mark.parametrize('entry', [replanning_envs_main, dmc_main, general_main, metaworld_main, mp_main, open_ai_main]) | ||||
| @pytest.mark.parametrize('render', [False]) | ||||
| def test_run_example(entry, render): | ||||
|     entry(render=render) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user