From 615ba45dde8128a14aebecbbd190f339450b85c4 Mon Sep 17 00:00:00 2001 From: kngwyu Date: Mon, 28 Sep 2020 20:05:08 +0900 Subject: [PATCH] Add manuall collision detection for billiard --- mujoco_maze/__init__.py | 2 +- mujoco_maze/maze_env.py | 36 +++++++++++++++++++++++++++--------- mujoco_maze/maze_task.py | 8 ++++---- tests/test_envs.py | 6 +++--- 4 files changed, 35 insertions(+), 17 deletions(-) diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py index 2871f1e..b468a1c 100644 --- a/mujoco_maze/__init__.py +++ b/mujoco_maze/__init__.py @@ -29,7 +29,7 @@ for maze_id in TaskRegistry.keys(): max_episode_steps=1000, reward_threshold=task_cls.REWARD_THRESHOLD, ) - if "Rolling" in maze_id: + if "Billiard" in maze_id: continue # Ant gym.envs.register( diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index 7c023ef..01586a3 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -72,6 +72,9 @@ class MazeEnv(gym.Env): self._collision = maze_env_utils.CollisionDetector( structure, size_scaling, torso_x, torso_y, model_cls.RADIUS, ) + self._objball_collision = maze_env_utils.CollisionDetector( + structure, size_scaling, torso_x, torso_y, 0.8, + ) else: self._collision = None @@ -360,12 +363,19 @@ class MazeEnv(gym.Env): coords.append((j * size_scaling, i * size_scaling)) return coords - def step(self, action) -> Tuple[np.ndarray, float, bool, dict]: + def _objball_positions(self) -> None: + return [ + self.wrapped_env.get_body_com(name)[:2].copy() for name in self.object_balls + ] + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: self.t += 1 if self.wrapped_env.MANUAL_COLLISION: old_pos = self.wrapped_env.get_xy() + old_objballs = self._objball_positions() inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action) new_pos = self.wrapped_env.get_xy() + new_objballs = self._objball_positions() # Checks that the new_position is in the wall collision = self._collision.detect(old_pos, new_pos) if collision is not None: @@ -375,6 +385,14 @@ class MazeEnv(gym.Env): self.wrapped_env.set_xy(old_pos) else: self.wrapped_env.set_xy(pos) + for name, old, new in zip(self.object_balls, old_objballs, new_objballs): + collision = self._objball_collision.detect(old, new) + if collision is not None: + pos = collision.point + self._restitution_coef * collision.rest() + if self._objball_collision.detect(old, pos) is not None: + pos = old + idx = self.wrapped_env.model.body_name2id(name) + self.wrapped_env.data.xipos[idx][:2] = pos else: inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action) next_obs = self._get_obs() @@ -394,19 +412,19 @@ def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) body, "geom", type="sphere", - name=f"objball_geom_{i}_{j}", - size="1", - pos="0.0 0.0 0.5", + name=f"objball_{i}_{j}_geom", + size="1.0", # Radius + pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!! rgba="0.1 0.1 0.7 1", contype="1", conaffinity="1", - mass="0.00004", - solimp="0.9 0.99 0.001" + solimp="0.9 0.99 0.001", + mass="0.0001", ) ET.SubElement( body, "joint", - name=f"objball_x_{i}_{j}", + name=f"objball_{i}_{j}_x", axis="1 0 0", pos="0 0 0.0", type="slide", @@ -414,7 +432,7 @@ def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) ET.SubElement( body, "joint", - name=f"objball_y_{i}_{j}", + name=f"objball_{i}_{j}_y", axis="0 1 0", pos="0 0 0", type="slide", @@ -422,7 +440,7 @@ def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) ET.SubElement( body, "joint", - name=f"objball_rot_{i}_{j}", + name=f"objball_{i}_{j}_rot", axis="0 0 1", pos="0 0 0", type="hinge", diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index c18a219..eecd3b2 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -27,7 +27,7 @@ class MazeGoal: reward_scale: float = 1.0, rgb: Rgb = RED, threshold: float = 0.6, - custom_size: Optional[float] = None + custom_size: Optional[float] = None, ) -> None: assert 0.0 <= reward_scale <= 1.0 self.pos = pos @@ -330,7 +330,7 @@ class DistRewardBlockMaze(GoalRewardBlockMaze, DistRewardMixIn): pass -class GoalRewardRolling(MazeTask): +class GoalRewardBilliard(MazeTask): REWARD_THRESHOLD: float = 0.9 PENALTY: float = -0.0001 MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0) @@ -360,7 +360,7 @@ class GoalRewardRolling(MazeTask): ] -class DistRewardRolling(GoalRewardRolling): +class DistRewardBilliard(GoalRewardBilliard): def reward(self, obs: np.ndarray) -> float: return -self.goals[0].euc_dist(obs[3:6]) / self.scale @@ -375,7 +375,7 @@ class TaskRegistry: "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms], "TRoom": [DistRewardTRoom, GoalRewardTRoom], "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze], - "Rolling": [DistRewardRolling, GoalRewardRolling], + "Billiard": [DistRewardBilliard, GoalRewardBilliard], } @staticmethod diff --git a/tests/test_envs.py b/tests/test_envs.py index f3b7eab..d769fd8 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -6,7 +6,7 @@ import mujoco_maze @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_ant_maze(maze_id): - if "Rolling" in maze_id: + if "Billiard" in maze_id: return for i in range(2): env = gym.make(f"Ant{maze_id}-v{i}") @@ -38,7 +38,7 @@ def test_point_maze(maze_id): @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_reacher_maze(maze_id): - for inhibited in ["Fall", "Push", "Block", "Rolling"]: + for inhibited in ["Fall", "Push", "Block", "Billiard"]: if inhibited in maze_id: return for i in range(2): @@ -52,7 +52,7 @@ def test_reacher_maze(maze_id): @pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys()) def test_swimmer_maze(maze_id): - for inhibited in ["Fall", "Push", "Block", "Rolling"]: + for inhibited in ["Fall", "Push", "Block", "Billiard"]: if inhibited in maze_id: return for i in range(2):