diff --git a/mujoco_maze/__init__.py b/mujoco_maze/__init__.py index 3309bc9..d5324dd 100644 --- a/mujoco_maze/__init__.py +++ b/mujoco_maze/__init__.py @@ -13,6 +13,7 @@ from mujoco_maze.maze_task import TaskRegistry from mujoco_maze.point import PointEnv from mujoco_maze.reacher import ReacherEnv from mujoco_maze.swimmer import SwimmerEnv +from mujoco_maze.pos_point import PosPointEnv for maze_id in TaskRegistry.keys(): for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)): @@ -31,6 +32,19 @@ for maze_id in TaskRegistry.keys(): max_episode_steps=1000, reward_threshold=task_cls.REWARD_THRESHOLD, ) + # PosPoint + gym.envs.register( + id=f"PosPoint{maze_id}-v{i}", + entry_point="mujoco_maze.maze_env:MazeEnv", + kwargs=dict( + model_cls=PosPointEnv, + maze_task=task_cls, + maze_size_scaling=point_scale, + inner_reward_scaling=task_cls.INNER_REWARD_SCALING, + ), + max_episode_steps=1000, + reward_threshold=task_cls.REWARD_THRESHOLD, + ) ant_scale = task_cls.MAZE_SIZE_SCALING.ant if ant_scale is not None: diff --git a/mujoco_maze/ant.py b/mujoco_maze/ant.py index bb8448f..b5c2026 100644 --- a/mujoco_maze/ant.py +++ b/mujoco_maze/ant.py @@ -57,6 +57,7 @@ class AntEnv(AgentModel): forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-4, forward_reward_fn: ForwardRewardFn = forward_reward_vnorm, + maze_env=None, ) -> None: self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight diff --git a/mujoco_maze/assets/point.xml b/mujoco_maze/assets/point.xml index 4c06cb1..f7dc3d7 100644 --- a/mujoco_maze/assets/point.xml +++ b/mujoco_maze/assets/point.xml @@ -14,7 +14,7 @@ - + diff --git a/mujoco_maze/maze_env.py b/mujoco_maze/maze_env.py index d1725bd..c56d3ad 100644 --- a/mujoco_maze/maze_env.py +++ b/mujoco_maze/maze_env.py @@ -212,10 +212,13 @@ class MazeEnv(gym.Env): rgba=goal.rgb.rgba_str(), ) + # Add the target site for visualization + self._add_target_site(worldbody) + _, file_path = tempfile.mkstemp(text=True, suffix=".xml") tree.write(file_path) self.world_tree = tree - self.wrapped_env = model_cls(file_path=file_path, **kwargs) + self.wrapped_env = model_cls(file_path=file_path, maze_env=self, **kwargs) self.observation_space = self._get_obs_space() self._websock_port = websock_port self._camera_move_x = camera_move_x @@ -225,6 +228,18 @@ class MazeEnv(gym.Env): self._mj_offscreen_viewer = None self._websock_server_pipe = None + def _add_target_site(self, worldbody): + """Add visualization for the target position if the agent has one.""" + target_size = self._maze_size_scaling * 0.1 + ET.SubElement( + worldbody, + "site", + name="target_site", + pos="2 0 0", # Initial position will be updated during the simulation + size=f"{target_size} {target_size} {target_size}", + rgba="0 0 1 0.5", # Blue and semi-transparent + ) + @property def has_extended_obs(self) -> bool: return self._top_down_view or self._observe_blocks or self._observe_balls @@ -473,6 +488,7 @@ class MazeEnv(gym.Env): self.wrapped_env.data.xipos[idx][:2] = pos else: inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action) + next_obs = self._get_obs() inner_reward = self._inner_reward_scaling * inner_reward outer_reward = self._task.reward(next_obs) diff --git a/mujoco_maze/point.py b/mujoco_maze/point.py index 9745c1f..d026917 100644 --- a/mujoco_maze/point.py +++ b/mujoco_maze/point.py @@ -33,7 +33,7 @@ class PointEnv(AgentModel): VELOCITY_LIMITS: float = 10.0 - def __init__(self, file_path: Optional[str] = None) -> None: + def __init__(self, file_path: Optional[str] = None, maze_env=None) -> None: high = np.inf * np.ones(6, dtype=np.float32) high[3:] = self.VELOCITY_LIMITS * 1.2 high[self.ORI_IND] = np.pi diff --git a/mujoco_maze/pos_point.py b/mujoco_maze/pos_point.py new file mode 100644 index 0000000..6def29d --- /dev/null +++ b/mujoco_maze/pos_point.py @@ -0,0 +1,121 @@ +from typing import Optional, Tuple +import gym +import mujoco +import numpy as np +import xml.etree.ElementTree as ET +import os +import tempfile +from mujoco_maze.agent_model import AgentModel + +class PosPointEnv(AgentModel): + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + "render_fps": 50, + } + + FILE: str = "point.xml" + MANUAL_COLLISION: bool = True + RADIUS: float = 0.4 + OBJBALL_TYPE: str = "hinge" + VELOCITY_LIMITS: float = 0.333 + ACCELERATION_LIMITS: float = 0.1 + + def __init__(self, file_path: Optional[str] = None, maze_env=None) -> None: + self.maze_env = maze_env + high = np.inf * np.ones(6, dtype=np.float32) # [target_x, target_y, current_x, current_y, velocity_x, velocity_y] + low = -high + observation_space = gym.spaces.Box(low, high) + action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(2,), dtype=np.float32) # Normalized [-1, 1] action space + self.target_pos = np.zeros(2) + self.action_penalty = 0.1 + super().__init__(file_path, 1, observation_space) + self.action_space = action_space + + def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: + # Calculate the maze size based on structure and scaling + maze_size = self._calculate_maze_size() + + # Scale the action to the maze size + self.target_pos = (action + 0.5)/1.9 * maze_size + + # Get current position and velocity + current_pos = self.get_xy() + current_vel = self.data.qvel[:2] + + # Compute desired velocity towards the target position + direction = self.target_pos - current_pos + distance = np.linalg.norm(direction) + if distance > 0: + direction /= distance # Normalize the direction + + desired_velocity = direction * min(distance, self.VELOCITY_LIMITS) + + # Compute acceleration required to reach the desired velocity + acceleration = desired_velocity - current_vel + acceleration = np.clip(acceleration, -self.ACCELERATION_LIMITS, self.ACCELERATION_LIMITS) + + # Update velocity and position + new_vel = current_vel + acceleration + new_vel = np.clip(new_vel, -self.VELOCITY_LIMITS, self.VELOCITY_LIMITS) + + qpos = self.data.qpos.copy() + qpos[:2] += new_vel # Update position with new velocity + + # Calculate the orientation to face the target + angle_to_target = np.arctan2(direction[1], direction[0]) + qpos[2] = angle_to_target # Set the orientation to face the target + + qpos = np.concatenate([qpos[:3], self.data.qpos[3:]]) # Ensure qpos has the correct shape + qvel = np.concatenate([new_vel, self.data.qvel[2:]]) # Ensure qvel has the correct shape + self.set_state(qpos, qvel) + + for _ in range(0, self.frame_skip): + mujoco.mj_step(self.model, self.data) + + # Update the position of the target site + self.data.site_xpos[-1][:2] = self.target_pos # Assuming 'target_site' is the first site added + self.data.site_xpos[-1][2] = 0.1 # Slightly higher in space + + next_obs = self._get_obs() + reward = -1*min(max(0, distance - 3), 100)*self.action_penalty + return next_obs, reward, False, {} + + def _calculate_maze_size(self) -> float: + """Calculate the effective size of the maze.""" + maze_structure = self.maze_env._maze_structure + num_rows = len(maze_structure) + num_cols = len(maze_structure[0]) if num_rows > 0 else 0 + maze_size_scaling = self.maze_env._maze_size_scaling + maze_size = max(num_rows, num_cols) * maze_size_scaling + return maze_size + + def _get_obs(self): + return np.concatenate( + [ + self.data.qpos.flat[:2], # current position + self.data.qvel.flat[:2], # current velocity + self.target_pos, # target position + ] + ) + + def reset_model(self): + qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.1, high=0.1) + qvel = self.init_qvel + self.np_random.random(self.model.nv) * 0.1 + + # Set everything other than point to original position and 0 velocity. + qpos[2:] = self.init_qpos[2:] + qvel[2:] = 0.0 + self.set_state(qpos, qvel) + return self._get_obs() + + def get_xy(self): + return self.data.qpos[:2].copy() + + def set_xy(self, xy: np.ndarray) -> None: + qpos = self.data.qpos.copy() + qpos[:2] = xy + self.set_state(qpos, self.data.qvel) diff --git a/mujoco_maze/reacher.py b/mujoco_maze/reacher.py index 9026e40..c80915a 100644 --- a/mujoco_maze/reacher.py +++ b/mujoco_maze/reacher.py @@ -31,6 +31,7 @@ class ReacherEnv(AgentModel): forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-4, forward_reward_fn: ForwardRewardFn = forward_reward_vnorm, + maze_env=None, ) -> None: self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight diff --git a/mujoco_maze/swimmer.py b/mujoco_maze/swimmer.py index 30938a4..9062f09 100644 --- a/mujoco_maze/swimmer.py +++ b/mujoco_maze/swimmer.py @@ -33,6 +33,7 @@ class SwimmerEnv(AgentModel): forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-4, forward_reward_fn: ForwardRewardFn = forward_reward_vnorm, + maze_env=None, ) -> None: self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight