diff --git a/mujoco_maze/ant.py b/mujoco_maze/ant.py
index d7a9bc3..d9dacdd 100644
--- a/mujoco_maze/ant.py
+++ b/mujoco_maze/ant.py
@@ -98,7 +98,7 @@ class AntEnv(AgentModel):
low=-0.1,
high=0.1,
)
- qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
+ qvel = self.init_qvel + self.np_random.random(self.model.nv) * 0.1
# Set everything other than ant to original position and 0 velocity.
qpos[15:] = self.init_qpos[15:]
diff --git a/mujoco_maze/assets/swimmer.xml b/mujoco_maze/assets/swimmer.xml
index 3c6c21a..d38be58 100644
--- a/mujoco_maze/assets/swimmer.xml
+++ b/mujoco_maze/assets/swimmer.xml
@@ -13,7 +13,7 @@
-
+
diff --git a/mujoco_maze/swimmer.py b/mujoco_maze/swimmer.py
index 4ac7699..30938a4 100644
--- a/mujoco_maze/swimmer.py
+++ b/mujoco_maze/swimmer.py
@@ -24,7 +24,7 @@ class SwimmerEnv(AgentModel):
"rgb_array",
"depth_array",
],
- "render_fps": 10
+ "render_fps": 25
}
def __init__(
@@ -41,12 +41,12 @@ class SwimmerEnv(AgentModel):
super().__init__(file_path, 4, self.observation_space)
def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
- xy_pos_after = self.sim.data.qpos[:2].copy()
+ xy_pos_after = self.data.qpos[:2].copy()
xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
return self._forward_reward_fn(xy_velocity)
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
- xy_pos_before = self.sim.data.qpos[:2].copy()
+ xy_pos_before = self.data.qpos[:2].copy()
self.do_simulation(action, self.frame_skip)
forward_reward = self._forward_reward(xy_pos_before)
ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@@ -58,8 +58,8 @@ class SwimmerEnv(AgentModel):
)
def _get_obs(self) -> np.ndarray:
- position = self.sim.data.qpos.flat.copy()
- velocity = self.sim.data.qvel.flat.copy()
+ position = self.data.qpos.flat.copy()
+ velocity = self.data.qvel.flat.copy()
observation = np.concatenate([position, velocity]).ravel()
return observation
@@ -79,9 +79,9 @@ class SwimmerEnv(AgentModel):
return self._get_obs()
def set_xy(self, xy: np.ndarray) -> None:
- qpos = self.sim.data.qpos.copy()
+ qpos = self.data.qpos.copy()
qpos[:2] = xy
- self.set_state(qpos, self.sim.data.qvel)
+ self.set_state(qpos, self.data.qvel)
def get_xy(self) -> np.ndarray:
- return np.copy(self.sim.data.qpos[:2])
+ return np.copy(self.data.qpos[:2])