From 155807207fb6398e4beca17e9434f89195ccf926 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 28 Aug 2023 18:38:33 +0200 Subject: [PATCH] Fix: SimpleReacher and ViaPointReacher did not seed correctly --- .../envs/classic_control/simple_reacher/simple_reacher.py | 8 ++++++-- .../classic_control/viapoint_reacher/viapoint_reacher.py | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py index db7274c..3afd021 100644 --- a/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py +++ b/fancy_gym/envs/classic_control/simple_reacher/simple_reacher.py @@ -45,9 +45,13 @@ class SimpleReacherEnv(BaseReacherTorqueEnv): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: - ret = super().reset(seed=seed, options=options) + # Reset twice to ensure we return obs after generating goal and generating goal after executing seeded reset. + # (Env will not behave deterministic otherwise) + # Yes, there is probably a more elegant solution to this problem... self._generate_goal() - return ret + super().reset(seed=seed, options=options) + self._generate_goal() + return super().reset(seed=seed, options=options) def _get_reward(self, action: np.ndarray): diff = self.end_effector - self._goal diff --git a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py index febccc7..e4d9091 100644 --- a/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py +++ b/fancy_gym/envs/classic_control/viapoint_reacher/viapoint_reacher.py @@ -44,6 +44,11 @@ class ViaPointReacherEnv(BaseReacherDirectEnv): def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \ -> Tuple[ObsType, Dict[str, Any]]: + # Reset twice to ensure we return obs after generating goal and generating goal after executing seeded reset. + # (Env will not behave deterministic otherwise) + # Yes, there is probably a more elegant solution to this problem... + self._generate_goal() + super().reset(seed=seed, options=options) self._generate_goal() return super().reset(seed=seed, options=options)