diff --git a/mujoco_maze/agent_model.py b/mujoco_maze/agent_model.py index 9d50fe4..c749639 100644 --- a/mujoco_maze/agent_model.py +++ b/mujoco_maze/agent_model.py @@ -11,7 +11,7 @@ from gym.utils import EzPickle class AgentModel(ABC, MujocoEnv, EzPickle): FILE: str MANUAL_COLLISION: bool - ORI_IND: int + ORI_IND: Optional[int] = None RADIUS: Optional[float] = None def __init__(self, file_path: str, frame_skip: int) -> None: diff --git a/mujoco_maze/maze_task.py b/mujoco_maze/maze_task.py index 828e140..09666c3 100644 --- a/mujoco_maze/maze_task.py +++ b/mujoco_maze/maze_task.py @@ -331,7 +331,7 @@ class SubGoalTRoom(GoalRewardTRoom): class NoRewardRoom(MazeTask): REWARD_THRESHOLD: float = 0.0 - MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 4.0) + MAZE_SIZE_SCALING: Scaling = Scaling(4.0, 4.0, 1.0) def reward(self, obs: np.ndarray) -> float: return 0.0 diff --git a/mujoco_maze/swimmer.py b/mujoco_maze/swimmer.py index 7d2c6de..efe225a 100644 --- a/mujoco_maze/swimmer.py +++ b/mujoco_maze/swimmer.py @@ -37,7 +37,6 @@ class SwimmerEnv(AgentModel): def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, dict]: xy_pos_before = self.sim.data.qpos[:2].copy() self.do_simulation(action, self.frame_skip) - forward_reward = self._forward_reward(xy_pos_before) ctrl_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return (