This commit is contained in:
kngwyu 2020-09-26 17:54:12 +09:00
parent d2c661d55c
commit 1c4152654b
2 changed files with 106 additions and 0 deletions

View File

@ -0,0 +1,34 @@
<mujoco model="swimmer">
<compiler angle="degree" coordinate="local" inertiafromgeom="true" />
<option collision="predefined" density="4000" integrator="RK4" timestep="0.01" viscosity="0.1" />
<default>
<geom conaffinity="1" condim="1" contype="1" material="geom" rgba="0.8 0.6 .4 1" />
<joint armature="0.1" />
</default>
<asset>
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1" reflectance="0.5" />
<material name='geom' texture="texgeom" texuniform="true" />
</asset>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0s 1.3" specular=".1 .1 .1" />
<geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" />
<!-- Reacher -->
<body name="torso" pos="0 0 0">
<camera name="track" mode="trackcom" pos="0 -3 3" xyaxes="1 0 0 0 1 1" />
<geom name="frontbody" density="1000" fromto="1.5 0 0 0.5 0 0" size="0.1" type="capsule" />
<joint axis="1 0 0" name="slider1" pos="0 0 0" type="slide" />
<joint axis="0 1 0" name="slider2" pos="0 0 0" type="slide" />
<joint axis="0 0 1" name="rot" pos="0 0 0" type="hinge" />
<body name="mid" pos="0.5 0 0">
<geom name="midbody" density="1000" fromto="0 0 0 -1 0 0" size="0.1" type="capsule" />
<joint axis="0 0 1" limited="true" name="rot2" pos="0 0 0" range="-100 100" type="hinge" />
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1 1" gear="150.0" joint="rot2" />
</actuator>
</mujoco>

72
mujoco_maze/reacher.py Normal file
View File

@ -0,0 +1,72 @@
"""
Based on the reacher in `dm_control`_.
.. _gym: https://github.com/openai/gym
"""
from typing import Tuple
import numpy as np
from mujoco_maze.agent_model import AgentModel
from mujoco_maze.ant import ForwardRewardFn, forward_reward_vnorm
class ReacherEnv(AgentModel):
FILE: str = "reacher.xml"
MANUAL_COLLISION: bool = False
def __init__(
self,
file_path: str = None,
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-4,
forward_reward_fn: ForwardRewardFn = forward_reward_vnorm,
) -> None:
self._forward_reward_weight = forward_reward_weight
self._ctrl_cost_weight = ctrl_cost_weight
self._forward_reward_fn = forward_reward_fn
super().__init__(file_path, 4)
def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
xy_pos_after = self.sim.data.qpos[:2].copy()
xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
return self._forward_reward_fn(xy_velocity)
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
xy_pos_before = self.sim.data.qpos[:2].copy()
self.do_simulation(action, self.frame_skip)
forward_reward = self._forward_reward(xy_pos_before)
ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
return (
self._get_obs(),
self._forward_reward_weight * forward_reward - ctrl_cost,
False,
dict(reward_forward=forward_reward, reward_ctrl=-ctrl_cost),
)
def _get_obs(self) -> np.ndarray:
position = self.sim.data.qpos.flat.copy()
velocity = self.sim.data.qvel.flat.copy()
observation = np.concatenate([position, velocity]).ravel()
return observation
def reset_model(self) -> np.ndarray:
qpos = self.init_qpos + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nq,
)
qvel = self.init_qvel + self.np_random.uniform(
low=-0.1, high=0.1, size=self.model.nv,
)
self.set_state(qpos, qvel)
return self._get_obs()
def set_xy(self, xy: np.ndarray) -> None:
qpos = self.sim.data.qpos.copy()
qpos[:2] = xy
self.set_state(qpos, self.sim.data.qvel)
def get_xy(self) -> np.ndarray:
return np.copy(self.sim.data.qpos[:2])