Reacher
This commit is contained in:
parent
d2c661d55c
commit
1c4152654b
34
mujoco_maze/assets/reacher.xml
Normal file
34
mujoco_maze/assets/reacher.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<mujoco model="swimmer">
|
||||
<compiler angle="degree" coordinate="local" inertiafromgeom="true" />
|
||||
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1" />
|
||||
<default>
|
||||
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1" />
|
||||
<joint armature="0.1" />
|
||||
</default>
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
|
||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
|
||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
|
||||
<material name='geom' texture="texgeom" texuniform="true" />
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0s 1.3" specular=".1 .1 .1" />
|
||||
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" />
|
||||
<!-- Reacher -->
|
||||
<body name="torso" pos="0 0 0">
|
||||
<camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1" />
|
||||
<geom name="frontbody" density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule" />
|
||||
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide" />
|
||||
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide" />
|
||||
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge" />
|
||||
<body name="mid" pos="0.5 0 0">
|
||||
<geom name="midbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
|
||||
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge" />
|
||||
</body>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2" />
|
||||
</actuator>
|
||||
</mujoco>
|
72
mujoco_maze/reacher.py
Normal file
72
mujoco_maze/reacher.py
Normal file
@ -0,0 +1,72 @@
|
||||
"""
|
||||
Based on the reacher in `dm_control`_.
|
||||
|
||||
.. _gym: https://github.com/openai/gym
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from mujoco_maze.agent_model import AgentModel
|
||||
from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
|
||||
|
||||
|
||||
class ReacherEnv(AgentModel):
|
||||
FILE: str = "reacher.xml"
|
||||
MANUAL_COLLISION: bool = False
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_path: str = None,
|
||||
forward_reward_weight: float = 1.0,
|
||||
ctrl_cost_weight: float = 1e-4,
|
||||
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
|
||||
) -> None:
|
||||
self._forward_reward_weight = forward_reward_weight
|
||||
self._ctrl_cost_weight = ctrl_cost_weight
|
||||
self._forward_reward_fn = forward_reward_fn
|
||||
super().__init__(file_path, 4)
|
||||
|
||||
def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
|
||||
xy_pos_after = self.sim.data.qpos[:2].copy()
|
||||
xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
|
||||
return self._forward_reward_fn(xy_velocity)
|
||||
|
||||
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
|
||||
xy_pos_before = self.sim.data.qpos[:2].copy()
|
||||
self.do_simulation(action, self.frame_skip)
|
||||
|
||||
forward_reward = self._forward_reward(xy_pos_before)
|
||||
ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
|
||||
return (
|
||||
self._get_obs(),
|
||||
self._forward_reward_weight * forward_reward - ctrl_cost,
|
||||
False,
|
||||
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
|
||||
)
|
||||
|
||||
def _get_obs(self) -> np.ndarray:
|
||||
position = self.sim.data.qpos.flat.copy()
|
||||
velocity = self.sim.data.qvel.flat.copy()
|
||||
observation = np.concatenate([position, velocity]).ravel()
|
||||
return observation
|
||||
|
||||
def reset_model(self) -> np.ndarray:
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
low=-0.1, high=0.1, size=self.model.nq,
|
||||
)
|
||||
qvel = self.init_qvel + self.np_random.uniform(
|
||||
low=-0.1, high=0.1, size=self.model.nv,
|
||||
)
|
||||
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def set_xy(self, xy: np.ndarray) -> None:
|
||||
qpos = self.sim.data.qpos.copy()
|
||||
qpos[:2] = xy
|
||||
self.set_state(qpos, self.sim.data.qvel)
|
||||
|
||||
def get_xy(self) -> np.ndarray:
|
||||
return np.copy(self.sim.data.qpos[:2])
|
Loading…
Reference in New Issue
Block a user