From ac8755b4c1f2d82de8d474b5b08e00660a83c9c5 Mon Sep 17 00:00:00 2001 From: kngwyu Date: Tue, 29 Sep 2020 00:47:19 +0900 Subject: [PATCH] Make the ball size configurable at task level --- mujoco_maze/maze_env.py | 19 ++++++++++++------- mujoco_maze/maze_task.py | 27 +++++++++++++++++---------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index 01586a3..0ed16a0 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -72,8 +72,9 @@ class MazeEnv(gym.Env): self._collision = maze_env_utils.CollisionDetector( structure, size_scaling, torso_x, torso_y, model_cls.RADIUS, ) + # Now all object balls have size=1.0 self._objball_collision = maze_env_utils.CollisionDetector( - structure, size_scaling, torso_x, torso_y, 0.8, + structure, size_scaling, torso_x, torso_y, self._task.OBJECT_BALL_SIZE, ) else: self._collision = None @@ -145,7 +146,7 @@ class MazeEnv(gym.Env): elif struct.is_object_ball(): # Movable Ball self.object_balls.append(f"objball_{i}_{j}") - _add_object_ball(worldbody, i, j, x, y) + _add_object_ball(worldbody, i, j, x, y, self._task.OBJECT_BALL_SIZE) torso = tree.find(".//body[@name='torso']") geoms = torso.findall(".//geom") @@ -385,6 +386,7 @@ class MazeEnv(gym.Env): self.wrapped_env.set_xy(old_pos) else: self.wrapped_env.set_xy(pos) + # Do the same check for object balls for name, old, new in zip(self.object_balls, old_objballs, new_objballs): collision = self._objball_collision.detect(old, new) if collision is not None: @@ -406,20 +408,23 @@ class MazeEnv(gym.Env): self.wrapped_env.close() -def _add_object_ball(worldbody: ET.Element, i: str, j: str, x: float, y: float) -> None: - body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0",) +def _add_object_ball( + worldbody: ET.Element, i: str, j: str, x: float, y: float, size: float +) -> None: + body = ET.SubElement(worldbody, "body", name=f"objball_{i}_{j}", pos=f"{x} {y} 0") + mass = 0.0001 * (size ** 3) ET.SubElement( body, "geom", type="sphere", name=f"objball_{i}_{j}_geom", - size="1.0", # Radius - pos="0.0 0.0 1.0", # Z = 1.0 so that this ball can move!! + size=f"{size}", # Radius + pos=f"0.0 0.0 {size}", # Z = size so that this ball can move!! rgba="0.1 0.1 0.7 1", contype="1", conaffinity="1", solimp="0.9 0.99 0.001", - mass="0.0001", + mass=f"{mass}", ) ET.SubElement( body, diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index eecd3b2..002e943 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -59,10 +59,14 @@ class MazeTask(ABC): PENALTY: Optional[float] = None MAZE_SIZE_SCALING: Scaling = Scaling(8.0, 4.0, 4.0) INNER_REWARD_SCALING: float = 0.01 - TOP_DOWN_VIEW: bool = False + # For Fall/Push/BlockMaze OBSERVE_BLOCKS: bool = False + # For Billiard OBSERVE_BALLS: bool = False + OBJECT_BALL_SIZE: float = 1.0 + # Unused now PUT_SPIN_NEAR_AGENT: bool = False + TOP_DOWN_VIEW: bool = False def __init__(self, scale: float) -> None: self.goals = [] @@ -143,7 +147,7 @@ class DistRewardSimpleRoom(GoalRewardSimpleRoom, DistRewardMixIn): class GoalRewardPush(GoalRewardUMaze): - TOP_DOWN_VIEW = True + OBSERVE_BLOCKS: bool = True def __init__(self, scale: float) -> None: super().__init__(scale) @@ -166,7 +170,7 @@ class DistRewardPush(GoalRewardPush, DistRewardMixIn): class GoalRewardFall(GoalRewardUMaze): - TOP_DOWN_VIEW = True + OBSERVE_BLOCKS: bool = True def __init__(self, scale: float) -> None: super().__init__(scale) @@ -336,10 +340,12 @@ class GoalRewardBilliard(MazeTask): MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0) OBSERVE_BALLS: bool = True - def __init__(self, scale: float, goal: Tuple[float, float] = (1.0, -2.0)) -> None: + def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None: super().__init__(scale) goal = np.array(goal) * scale - self.goals = [MazeGoal(goal, threshold=1.25, custom_size=0.25)] + goal_size = 0.3 + threshold = goal_size + self.OBJECT_BALL_SIZE + self.goals = [MazeGoal(goal, threshold=threshold, custom_size=goal_size)] def reward(self, obs: np.ndarray) -> float: return 1.0 if self.termination(obs) else self.PENALTY @@ -352,11 +358,12 @@ class GoalRewardBilliard(MazeTask): E, B = MazeCell.EMPTY, MazeCell.BLOCK R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL return [ - [B, B, B, B, B], - [B, E, E, E, B], - [B, E, M, E, B], - [B, E, R, E, B], - [B, B, B, B, B], + [B, B, B, B, B, B, B], + [B, E, E, E, E, E, B], + [B, E, E, E, E, E, B], + [B, E, E, M, E, E, B], + [B, E, E, R, E, E, B], + [B, B, B, B, B, B, B], ]