diff --git a/mujoco_maze/ant.py b/mujoco_maze/ant.py index d7a9bc3..d9dacdd 100644 --- a/mujoco_maze/ant.py +++ b/mujoco_maze/ant.py @@ -98,7 +98,7 @@ class AntEnv(AgentModel): low=-0.1, high=0.1, ) - qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1 + qvel = self.init_qvel + self.np_random.random(self.model.nv) * 0.1 # Set everything other than ant to original position and 0 velocity. qpos[15:] = self.init_qpos[15:] diff --git a/mujoco_maze/assets/swimmer.xml b/mujoco_maze/assets/swimmer.xml index 3c6c21a..d38be58 100644 --- a/mujoco_maze/assets/swimmer.xml +++ b/mujoco_maze/assets/swimmer.xml @@ -13,7 +13,7 @@ - + diff --git a/mujoco_maze/swimmer.py b/mujoco_maze/swimmer.py index 4ac7699..30938a4 100644 --- a/mujoco_maze/swimmer.py +++ b/mujoco_maze/swimmer.py @@ -24,7 +24,7 @@ class SwimmerEnv(AgentModel): "rgb_array", "depth_array", ], - "render_fps": 10 + "render_fps": 25 } def __init__( @@ -41,12 +41,12 @@ class SwimmerEnv(AgentModel): super().__init__(file_path, 4, self.observation_space) def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]: - xy_pos_after = self.sim.data.qpos[:2].copy() + xy_pos_after = self.data.qpos[:2].copy() xy_velocity = (xy_pos_after - xy_pos_before) / self.dt return self._forward_reward_fn(xy_velocity) def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: - xy_pos_before = self.sim.data.qpos[:2].copy() + xy_pos_before = self.data.qpos[:2].copy() self.do_simulation(action, self.frame_skip) forward_reward = self._forward_reward(xy_pos_before) ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action)) @@ -58,8 +58,8 @@ class SwimmerEnv(AgentModel): ) def _get_obs(self) -> np.ndarray: - position = self.sim.data.qpos.flat.copy() - velocity = self.sim.data.qvel.flat.copy() + position = self.data.qpos.flat.copy() + velocity = self.data.qvel.flat.copy() observation = np.concatenate([position, velocity]).ravel() return observation @@ -79,9 +79,9 @@ class SwimmerEnv(AgentModel): return self._get_obs() def set_xy(self, xy: np.ndarray) -> None: - qpos = self.sim.data.qpos.copy() + qpos = self.data.qpos.copy() qpos[:2] = xy - self.set_state(qpos, self.sim.data.qvel) + self.set_state(qpos, self.data.qvel) def get_xy(self) -> np.ndarray: - return np.copy(self.sim.data.qpos[:2]) + return np.copy(self.data.qpos[:2])