.
This commit is contained in:
parent
807cd0ec4d
commit
aa63f374fe
@ -13,6 +13,7 @@ from mujoco_maze.maze_task import TaskRegistry
|
|||||||
from mujoco_maze.point import PointEnv
|
from mujoco_maze.point import PointEnv
|
||||||
from mujoco_maze.reacher import ReacherEnv
|
from mujoco_maze.reacher import ReacherEnv
|
||||||
from mujoco_maze.swimmer import SwimmerEnv
|
from mujoco_maze.swimmer import SwimmerEnv
|
||||||
|
from mujoco_maze.pos_point import PosPointEnv
|
||||||
|
|
||||||
for maze_id in TaskRegistry.keys():
|
for maze_id in TaskRegistry.keys():
|
||||||
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
for i, task_cls in enumerate(TaskRegistry.tasks(maze_id)):
|
||||||
@ -31,6 +32,19 @@ for maze_id in TaskRegistry.keys():
|
|||||||
max_episode_steps=1000,
|
max_episode_steps=1000,
|
||||||
reward_threshold=task_cls.REWARD_THRESHOLD,
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
# PosPoint
|
||||||
|
gym.envs.register(
|
||||||
|
id=f"PosPoint{maze_id}-v{i}",
|
||||||
|
entry_point="mujoco_maze.maze_env:MazeEnv",
|
||||||
|
kwargs=dict(
|
||||||
|
model_cls=PosPointEnv,
|
||||||
|
maze_task=task_cls,
|
||||||
|
maze_size_scaling=point_scale,
|
||||||
|
inner_reward_scaling=task_cls.INNER_REWARD_SCALING,
|
||||||
|
),
|
||||||
|
max_episode_steps=1000,
|
||||||
|
reward_threshold=task_cls.REWARD_THRESHOLD,
|
||||||
|
)
|
||||||
|
|
||||||
ant_scale = task_cls.MAZE_SIZE_SCALING.ant
|
ant_scale = task_cls.MAZE_SIZE_SCALING.ant
|
||||||
if ant_scale is not None:
|
if ant_scale is not None:
|
||||||
|
@ -57,6 +57,7 @@ class AntEnv(AgentModel):
|
|||||||
forward_reward_weight: float = 1.0,
|
forward_reward_weight: float = 1.0,
|
||||||
ctrl_cost_weight: float = 1e-4,
|
ctrl_cost_weight: float = 1e-4,
|
||||||
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
||||||
|
maze_env=None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
self._ctrl_cost_weight = ctrl_cost_weight
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
</asset>
|
</asset>
|
||||||
<worldbody>
|
<worldbody>
|
||||||
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
|
||||||
<geom name="floor" material="MatPlane" pos="0 0 0" size="40 40 40" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
|
<geom name="floor" material="MatPlane" pos="0 0 0" size="60 60 60" type="plane" conaffinity="1" rgba="0.8 0.9 0.8 1" condim="3" />
|
||||||
<!-- ================= Point ================= /-->
|
<!-- ================= Point ================= /-->
|
||||||
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
|
<!-- Note that the solimp is modified from rllab to prevent the point from going through the wall /-->
|
||||||
<body name="torso" pos="0 0 0">
|
<body name="torso" pos="0 0 0">
|
||||||
|
@ -212,10 +212,13 @@ class MazeEnv(gym.Env):
|
|||||||
rgba=goal.rgb.rgba_str(),
|
rgba=goal.rgb.rgba_str(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add the target site for visualization
|
||||||
|
self._add_target_site(worldbody)
|
||||||
|
|
||||||
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
_, file_path = tempfile.mkstemp(text=True, suffix=".xml")
|
||||||
tree.write(file_path)
|
tree.write(file_path)
|
||||||
self.world_tree = tree
|
self.world_tree = tree
|
||||||
self.wrapped_env = model_cls(file_path=file_path, **kwargs)
|
self.wrapped_env = model_cls(file_path=file_path, maze_env=self, **kwargs)
|
||||||
self.observation_space = self._get_obs_space()
|
self.observation_space = self._get_obs_space()
|
||||||
self._websock_port = websock_port
|
self._websock_port = websock_port
|
||||||
self._camera_move_x = camera_move_x
|
self._camera_move_x = camera_move_x
|
||||||
@ -225,6 +228,18 @@ class MazeEnv(gym.Env):
|
|||||||
self._mj_offscreen_viewer = None
|
self._mj_offscreen_viewer = None
|
||||||
self._websock_server_pipe = None
|
self._websock_server_pipe = None
|
||||||
|
|
||||||
|
def _add_target_site(self, worldbody):
|
||||||
|
"""Add visualization for the target position if the agent has one."""
|
||||||
|
target_size = self._maze_size_scaling * 0.1
|
||||||
|
ET.SubElement(
|
||||||
|
worldbody,
|
||||||
|
"site",
|
||||||
|
name="target_site",
|
||||||
|
pos="2 0 0", # Initial position will be updated during the simulation
|
||||||
|
size=f"{target_size} {target_size} {target_size}",
|
||||||
|
rgba="0 0 1 0.5", # Blue and semi-transparent
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_extended_obs(self) -> bool:
|
def has_extended_obs(self) -> bool:
|
||||||
return self._top_down_view or self._observe_blocks or self._observe_balls
|
return self._top_down_view or self._observe_blocks or self._observe_balls
|
||||||
@ -473,6 +488,7 @@ class MazeEnv(gym.Env):
|
|||||||
self.wrapped_env.data.xipos[idx][:2] = pos
|
self.wrapped_env.data.xipos[idx][:2] = pos
|
||||||
else:
|
else:
|
||||||
inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action)
|
inner_next_obs, inner_reward, _, info = self.wrapped_env.step(action)
|
||||||
|
|
||||||
next_obs = self._get_obs()
|
next_obs = self._get_obs()
|
||||||
inner_reward = self._inner_reward_scaling * inner_reward
|
inner_reward = self._inner_reward_scaling * inner_reward
|
||||||
outer_reward = self._task.reward(next_obs)
|
outer_reward = self._task.reward(next_obs)
|
||||||
|
@ -33,7 +33,7 @@ class PointEnv(AgentModel):
|
|||||||
|
|
||||||
VELOCITY_LIMITS: float = 10.0
|
VELOCITY_LIMITS: float = 10.0
|
||||||
|
|
||||||
def __init__(self, file_path: Optional[str] = None) -> None:
|
def __init__(self, file_path: Optional[str] = None, maze_env=None) -> None:
|
||||||
high = np.inf * np.ones(6, dtype=np.float32)
|
high = np.inf * np.ones(6, dtype=np.float32)
|
||||||
high[3:] = self.VELOCITY_LIMITS * 1.2
|
high[3:] = self.VELOCITY_LIMITS * 1.2
|
||||||
high[self.ORI_IND] = np.pi
|
high[self.ORI_IND] = np.pi
|
||||||
|
121
mujoco_maze/pos_point.py
Normal file
121
mujoco_maze/pos_point.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
from typing import Optional, Tuple
|
||||||
|
import gym
|
||||||
|
import mujoco
|
||||||
|
import numpy as np
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from mujoco_maze.agent_model import AgentModel
|
||||||
|
|
||||||
|
class PosPointEnv(AgentModel):
|
||||||
|
metadata = {
|
||||||
|
"render_modes": [
|
||||||
|
"human",
|
||||||
|
"rgb_array",
|
||||||
|
"depth_array",
|
||||||
|
],
|
||||||
|
"render_fps": 50,
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE: str = "point.xml"
|
||||||
|
MANUAL_COLLISION: bool = True
|
||||||
|
RADIUS: float = 0.4
|
||||||
|
OBJBALL_TYPE: str = "hinge"
|
||||||
|
VELOCITY_LIMITS: float = 0.333
|
||||||
|
ACCELERATION_LIMITS: float = 0.1
|
||||||
|
|
||||||
|
def __init__(self, file_path: Optional[str] = None, maze_env=None) -> None:
|
||||||
|
self.maze_env = maze_env
|
||||||
|
high = np.inf * np.ones(6, dtype=np.float32) # [target_x, target_y, current_x, current_y, velocity_x, velocity_y]
|
||||||
|
low = -high
|
||||||
|
observation_space = gym.spaces.Box(low, high)
|
||||||
|
action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(2,), dtype=np.float32) # Normalized [-1, 1] action space
|
||||||
|
self.target_pos = np.zeros(2)
|
||||||
|
self.action_penalty = 0.1
|
||||||
|
super().__init__(file_path, 1, observation_space)
|
||||||
|
self.action_space = action_space
|
||||||
|
|
||||||
|
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
|
||||||
|
# Calculate the maze size based on structure and scaling
|
||||||
|
maze_size = self._calculate_maze_size()
|
||||||
|
|
||||||
|
# Scale the action to the maze size
|
||||||
|
self.target_pos = (action + 0.5)/1.9 * maze_size
|
||||||
|
|
||||||
|
# Get current position and velocity
|
||||||
|
current_pos = self.get_xy()
|
||||||
|
current_vel = self.data.qvel[:2]
|
||||||
|
|
||||||
|
# Compute desired velocity towards the target position
|
||||||
|
direction = self.target_pos - current_pos
|
||||||
|
distance = np.linalg.norm(direction)
|
||||||
|
if distance > 0:
|
||||||
|
direction /= distance # Normalize the direction
|
||||||
|
|
||||||
|
desired_velocity = direction * min(distance, self.VELOCITY_LIMITS)
|
||||||
|
|
||||||
|
# Compute acceleration required to reach the desired velocity
|
||||||
|
acceleration = desired_velocity - current_vel
|
||||||
|
acceleration = np.clip(acceleration, -self.ACCELERATION_LIMITS, self.ACCELERATION_LIMITS)
|
||||||
|
|
||||||
|
# Update velocity and position
|
||||||
|
new_vel = current_vel + acceleration
|
||||||
|
new_vel = np.clip(new_vel, -self.VELOCITY_LIMITS, self.VELOCITY_LIMITS)
|
||||||
|
|
||||||
|
qpos = self.data.qpos.copy()
|
||||||
|
qpos[:2] += new_vel # Update position with new velocity
|
||||||
|
|
||||||
|
# Calculate the orientation to face the target
|
||||||
|
angle_to_target = np.arctan2(direction[1], direction[0])
|
||||||
|
qpos[2] = angle_to_target # Set the orientation to face the target
|
||||||
|
|
||||||
|
qpos = np.concatenate([qpos[:3], self.data.qpos[3:]]) # Ensure qpos has the correct shape
|
||||||
|
qvel = np.concatenate([new_vel, self.data.qvel[2:]]) # Ensure qvel has the correct shape
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
|
||||||
|
for _ in range(0, self.frame_skip):
|
||||||
|
mujoco.mj_step(self.model, self.data)
|
||||||
|
|
||||||
|
# Update the position of the target site
|
||||||
|
self.data.site_xpos[-1][:2] = self.target_pos # Assuming 'target_site' is the first site added
|
||||||
|
self.data.site_xpos[-1][2] = 0.1 # Slightly higher in space
|
||||||
|
|
||||||
|
next_obs = self._get_obs()
|
||||||
|
reward = -1*min(max(0, distance - 3), 100)*self.action_penalty
|
||||||
|
return next_obs, reward, False, {}
|
||||||
|
|
||||||
|
def _calculate_maze_size(self) -> float:
|
||||||
|
"""Calculate the effective size of the maze."""
|
||||||
|
maze_structure = self.maze_env._maze_structure
|
||||||
|
num_rows = len(maze_structure)
|
||||||
|
num_cols = len(maze_structure[0]) if num_rows > 0 else 0
|
||||||
|
maze_size_scaling = self.maze_env._maze_size_scaling
|
||||||
|
maze_size = max(num_rows, num_cols) * maze_size_scaling
|
||||||
|
return maze_size
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
return np.concatenate(
|
||||||
|
[
|
||||||
|
self.data.qpos.flat[:2], # current position
|
||||||
|
self.data.qvel.flat[:2], # current velocity
|
||||||
|
self.target_pos, # target position
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
def reset_model(self):
|
||||||
|
qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.1, high=0.1)
|
||||||
|
qvel = self.init_qvel + self.np_random.random(self.model.nv) * 0.1
|
||||||
|
|
||||||
|
# Set everything other than point to original position and 0 velocity.
|
||||||
|
qpos[2:] = self.init_qpos[2:]
|
||||||
|
qvel[2:] = 0.0
|
||||||
|
self.set_state(qpos, qvel)
|
||||||
|
return self._get_obs()
|
||||||
|
|
||||||
|
def get_xy(self):
|
||||||
|
return self.data.qpos[:2].copy()
|
||||||
|
|
||||||
|
def set_xy(self, xy: np.ndarray) -> None:
|
||||||
|
qpos = self.data.qpos.copy()
|
||||||
|
qpos[:2] = xy
|
||||||
|
self.set_state(qpos, self.data.qvel)
|
@ -31,6 +31,7 @@ class ReacherEnv(AgentModel):
|
|||||||
forward_reward_weight: float = 1.0,
|
forward_reward_weight: float = 1.0,
|
||||||
ctrl_cost_weight: float = 1e-4,
|
ctrl_cost_weight: float = 1e-4,
|
||||||
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
||||||
|
maze_env=None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
self._ctrl_cost_weight = ctrl_cost_weight
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
|
@ -33,6 +33,7 @@ class SwimmerEnv(AgentModel):
|
|||||||
forward_reward_weight: float = 1.0,
|
forward_reward_weight: float = 1.0,
|
||||||
ctrl_cost_weight: float = 1e-4,
|
ctrl_cost_weight: float = 1e-4,
|
||||||
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
||||||
|
maze_env=None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._forward_reward_weight = forward_reward_weight
|
self._forward_reward_weight = forward_reward_weight
|
||||||
self._ctrl_cost_weight = ctrl_cost_weight
|
self._ctrl_cost_weight = ctrl_cost_weight
|
||||||
|
Loading…
Reference in New Issue
Block a user