Add Billiard-v2
This commit is contained in:
		
							parent
							
								
									ac8755b4c1
								
							
						
					
					
						commit
						534e93c3d3
					
				@ -167,7 +167,7 @@ class MazeEnv(gym.Env):
 | 
				
			|||||||
                name=f"goal_site{i}",
 | 
					                name=f"goal_site{i}",
 | 
				
			||||||
                pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
 | 
					                pos=f"{goal.pos[0]} {goal.pos[1]} {z}",
 | 
				
			||||||
                size=f"{maze_size_scaling * 0.1}",
 | 
					                size=f"{maze_size_scaling * 0.1}",
 | 
				
			||||||
                rgba=goal.rbga_str(),
 | 
					                rgba=goal.rgb.rgba_str(),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
 | 
					        _, file_path = tempfile.mkstemp(text=True, suffix=".xml")
 | 
				
			||||||
@ -420,7 +420,7 @@ def _add_object_ball(
 | 
				
			|||||||
        name=f"objball_{i}_{j}_geom",
 | 
					        name=f"objball_{i}_{j}_geom",
 | 
				
			||||||
        size=f"{size}",  # Radius
 | 
					        size=f"{size}",  # Radius
 | 
				
			||||||
        pos=f"0.0 0.0 {size}",  # Z = size so that this ball can move!!
 | 
					        pos=f"0.0 0.0 {size}",  # Z = size so that this ball can move!!
 | 
				
			||||||
        rgba="0.1 0.1 0.7 1",
 | 
					        rgba=maze_task.BLUE.rgba_str(),
 | 
				
			||||||
        contype="1",
 | 
					        contype="1",
 | 
				
			||||||
        conaffinity="1",
 | 
					        conaffinity="1",
 | 
				
			||||||
        solimp="0.9 0.99 0.001",
 | 
					        solimp="0.9 0.99 0.001",
 | 
				
			||||||
 | 
				
			|||||||
@ -14,6 +14,9 @@ class Rgb(NamedTuple):
 | 
				
			|||||||
    green: float
 | 
					    green: float
 | 
				
			||||||
    blue: float
 | 
					    blue: float
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def rgba_str(self) -> str:
 | 
				
			||||||
 | 
					        return f"{self.red} {self.green} {self.blue} 1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RED = Rgb(0.7, 0.1, 0.1)
 | 
					RED = Rgb(0.7, 0.1, 0.1)
 | 
				
			||||||
GREEN = Rgb(0.1, 0.7, 0.1)
 | 
					GREEN = Rgb(0.1, 0.7, 0.1)
 | 
				
			||||||
@ -37,10 +40,6 @@ class MazeGoal:
 | 
				
			|||||||
        self.threshold = threshold
 | 
					        self.threshold = threshold
 | 
				
			||||||
        self.custom_size = custom_size
 | 
					        self.custom_size = custom_size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def rbga_str(self) -> str:
 | 
					 | 
				
			||||||
        r, g, b = self.rgb
 | 
					 | 
				
			||||||
        return f"{r} {g} {b} 1"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def neighbor(self, obs: np.ndarray) -> float:
 | 
					    def neighbor(self, obs: np.ndarray) -> float:
 | 
				
			||||||
        return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
 | 
					        return np.linalg.norm(obs[: self.dim] - self.pos) <= self.threshold
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -339,19 +338,31 @@ class GoalRewardBilliard(MazeTask):
 | 
				
			|||||||
    PENALTY: float = -0.0001
 | 
					    PENALTY: float = -0.0001
 | 
				
			||||||
    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
 | 
					    MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 3.0, 3.0)
 | 
				
			||||||
    OBSERVE_BALLS: bool = True
 | 
					    OBSERVE_BALLS: bool = True
 | 
				
			||||||
 | 
					    GOAL_SIZE: float = 0.3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
 | 
					    def __init__(self, scale: float, goal: Tuple[float, float] = (2.0, -3.0)) -> None:
 | 
				
			||||||
        super().__init__(scale)
 | 
					        super().__init__(scale)
 | 
				
			||||||
        goal = np.array(goal) * scale
 | 
					        goal = np.array(goal) * scale
 | 
				
			||||||
        goal_size = 0.3
 | 
					        self.goals.append(
 | 
				
			||||||
        threshold = goal_size + self.OBJECT_BALL_SIZE
 | 
					            MazeGoal(goal, threshold=self._threshold(), custom_size=self.GOAL_SIZE)
 | 
				
			||||||
        self.goals = [MazeGoal(goal, threshold=threshold, custom_size=goal_size)]
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _threshold(self) -> float:
 | 
				
			||||||
 | 
					        return self.OBJECT_BALL_SIZE + self.GOAL_SIZE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def reward(self, obs: np.ndarray) -> float:
 | 
					    def reward(self, obs: np.ndarray) -> float:
 | 
				
			||||||
        return 1.0 if self.termination(obs) else self.PENALTY
 | 
					        object_pos = obs[3:6]
 | 
				
			||||||
 | 
					        for goal in self.goals:
 | 
				
			||||||
 | 
					            if goal.neighbor(object_pos):
 | 
				
			||||||
 | 
					                return goal.reward_scale
 | 
				
			||||||
 | 
					        return self.PENALTY
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def termination(self, obs: np.ndarray) -> bool:
 | 
					    def termination(self, obs: np.ndarray) -> bool:
 | 
				
			||||||
        return super().termination(obs[3:6])
 | 
					        object_pos = obs[3:6]
 | 
				
			||||||
 | 
					        for goal in self.goals:
 | 
				
			||||||
 | 
					            if goal.neighbor(object_pos):
 | 
				
			||||||
 | 
					                return True
 | 
				
			||||||
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def create_maze() -> List[List[MazeCell]]:
 | 
					    def create_maze() -> List[List[MazeCell]]:
 | 
				
			||||||
@ -372,6 +383,38 @@ class DistRewardBilliard(GoalRewardBilliard):
 | 
				
			|||||||
        return -self.goals[0].euc_dist(obs[3:6]) / self.scale
 | 
					        return -self.goals[0].euc_dist(obs[3:6]) / self.scale
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class SubGoalBilliard(GoalRewardBilliard):
 | 
				
			||||||
 | 
					    def __init__(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        scale: float,
 | 
				
			||||||
 | 
					        primary_goal: Tuple[float, float] = (2.0, -3.0),
 | 
				
			||||||
 | 
					        subgoal: Tuple[float, float] = (-2.0, -3.0),
 | 
				
			||||||
 | 
					    ) -> None:
 | 
				
			||||||
 | 
					        super().__init__(scale, primary_goal)
 | 
				
			||||||
 | 
					        self.goals.append(
 | 
				
			||||||
 | 
					            MazeGoal(
 | 
				
			||||||
 | 
					                np.array(subgoal) * scale,
 | 
				
			||||||
 | 
					                reward_scale=0.5,
 | 
				
			||||||
 | 
					                rgb=GREEN,
 | 
				
			||||||
 | 
					                threshold=self._threshold(),
 | 
				
			||||||
 | 
					                custom_size=self.GOAL_SIZE,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def create_maze() -> List[List[MazeCell]]:
 | 
				
			||||||
 | 
					        E, B = MazeCell.EMPTY, MazeCell.BLOCK
 | 
				
			||||||
 | 
					        R, M = MazeCell.ROBOT, MazeCell.OBJECT_BALL
 | 
				
			||||||
 | 
					        return [
 | 
				
			||||||
 | 
					            [B, B, B, B, B, B, B],
 | 
				
			||||||
 | 
					            [B, E, E, E, E, E, B],
 | 
				
			||||||
 | 
					            [B, E, E, E, B, B, B],
 | 
				
			||||||
 | 
					            [B, E, E, M, E, E, B],
 | 
				
			||||||
 | 
					            [B, E, E, R, E, E, B],
 | 
				
			||||||
 | 
					            [B, B, B, B, B, B, B],
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TaskRegistry:
 | 
					class TaskRegistry:
 | 
				
			||||||
    REGISTRY: Dict[str, List[Type[MazeTask]]] = {
 | 
					    REGISTRY: Dict[str, List[Type[MazeTask]]] = {
 | 
				
			||||||
        "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
 | 
					        "SimpleRoom": [DistRewardSimpleRoom, GoalRewardSimpleRoom],
 | 
				
			||||||
@ -382,7 +425,7 @@ class TaskRegistry:
 | 
				
			|||||||
        "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
 | 
					        "4Rooms": [DistReward4Rooms, GoalReward4Rooms, SubGoal4Rooms],
 | 
				
			||||||
        "TRoom": [DistRewardTRoom, GoalRewardTRoom],
 | 
					        "TRoom": [DistRewardTRoom, GoalRewardTRoom],
 | 
				
			||||||
        "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
 | 
					        "BlockMaze": [DistRewardBlockMaze, GoalRewardBlockMaze],
 | 
				
			||||||
        "Billiard": [DistRewardBilliard, GoalRewardBilliard],
 | 
					        "Billiard": [DistRewardBilliard, GoalRewardBilliard, SubGoalBilliard],
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
 | 
				
			|||||||
@ -36,6 +36,20 @@ def test_point_maze(maze_id):
 | 
				
			|||||||
            assert r < 0.0
 | 
					            assert r < 0.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@pytest.mark.parametrize("maze_id", ["2Rooms", "4Rooms", "Billiard"])
 | 
				
			||||||
 | 
					def test_subgoal_envs(maze_id):
 | 
				
			||||||
 | 
					    env = gym.make(f"Point{maze_id}-v2")
 | 
				
			||||||
 | 
					    s0 = env.reset()
 | 
				
			||||||
 | 
					    s, r, _, _ = env.step(env.action_space.sample())
 | 
				
			||||||
 | 
					    if not env.unwrapped.has_extended_obs:
 | 
				
			||||||
 | 
					        assert s0.shape == (7,)
 | 
				
			||||||
 | 
					        assert s.shape == (7,)
 | 
				
			||||||
 | 
					    elif env.unwrapped._observe_balls:
 | 
				
			||||||
 | 
					        assert s0.shape == (10,)
 | 
				
			||||||
 | 
					        assert s.shape == (10,)
 | 
				
			||||||
 | 
					    assert len(env.unwrapped._task.goals) > 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
 | 
					@pytest.mark.parametrize("maze_id", mujoco_maze.TaskRegistry.keys())
 | 
				
			||||||
def test_reacher_maze(maze_id):
 | 
					def test_reacher_maze(maze_id):
 | 
				
			||||||
    for inhibited in ["Fall", "Push", "Block", "Billiard"]:
 | 
					    for inhibited in ["Fall", "Push", "Block", "Billiard"]:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user