fix stuff

2024-04-04 15:08:45 +02:00 · 2024-04-04 15:08:45 +02:00 · 9b3a5ccca5
commit 9b3a5ccca5
parent a19f232c03
3 changed files with 10 additions and 10 deletions
--- a/mujoco_maze/ant.py
+++ b/mujoco_maze/ant.py
@ -98,7 +98,7 @@ class AntEnv(AgentModel):
            low=-0.1,
            high=0.1,
        )
-        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * 0.1
+        qvel = self.init_qvel + self.np_random.random(self.model.nv) * 0.1

        # Set everything other than ant to original position and 0 velocity.
        qpos[15:] = self.init_qpos[15:]
--- a/mujoco_maze/assets/swimmer.xml
+++ b/mujoco_maze/assets/swimmer.xml
@ -13,7 +13,7 @@
    <material name='geom' texture="texgeom" texuniform="true" />
  </asset>
  <worldbody>
-    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0s 1.3" specular=".1 .1 .1" />
+    <light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1" />
    <geom conaffinity="1" condim="3" material="MatPlane" name="floor" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="plane" />
    <!--  ================= SWIMMER ================= /-->
    <body name="torso" pos="0 0 0">
--- a/mujoco_maze/swimmer.py
+++ b/mujoco_maze/swimmer.py
@ -24,7 +24,7 @@ class SwimmerEnv(AgentModel):
            "rgb_array",
            "depth_array",
        ],
-        "render_fps": 10
+        "render_fps": 25
    }

    def __init__(
@ -41,12 +41,12 @@ class SwimmerEnv(AgentModel):
        super().__init__(file_path, 4, self.observation_space)

    def _forward_reward(self, xy_pos_before: np.ndarray) -> Tuple[float, np.ndarray]:
-        xy_pos_after = self.sim.data.qpos[:2].copy()
+        xy_pos_after = self.data.qpos[:2].copy()
        xy_velocity = (xy_pos_after - xy_pos_before) / self.dt
        return self._forward_reward_fn(xy_velocity)

    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]:
-        xy_pos_before = self.sim.data.qpos[:2].copy()
+        xy_pos_before = self.data.qpos[:2].copy()
        self.do_simulation(action, self.frame_skip)
        forward_reward = self._forward_reward(xy_pos_before)
        ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action))
@ -58,8 +58,8 @@ class SwimmerEnv(AgentModel):
        )

    def _get_obs(self) -> np.ndarray:
-        position = self.sim.data.qpos.flat.copy()
-        velocity = self.sim.data.qvel.flat.copy()
+        position = self.data.qpos.flat.copy()
+        velocity = self.data.qvel.flat.copy()
        observation = np.concatenate([position, velocity]).ravel()
        return observation

@ -79,9 +79,9 @@ class SwimmerEnv(AgentModel):
        return self._get_obs()

    def set_xy(self, xy: np.ndarray) -> None:
-        qpos = self.sim.data.qpos.copy()
+        qpos = self.data.qpos.copy()
        qpos[:2] = xy
-        self.set_state(qpos, self.sim.data.qvel)
+        self.set_state(qpos, self.data.qvel)

    def get_xy(self) -> np.ndarray:
-        return np.copy(self.sim.data.qpos[:2])
+        return np.copy(self.data.qpos[:2])