Fix: Some fancy envs failed determinism test because they sampled during reset before the random gen was seeded

This commit is contained in:
Dominik Moritz Roth 2023-08-28 18:16:16 +02:00
parent 5921e0008b
commit 07aeb779a7
6 changed files with 12 additions and 6 deletions

View File

@ -162,8 +162,9 @@ class AntJumpEnv(AntEnvCustomXML):
self.current_step = 0
self.max_height = 0
# goal heights from 1.0 to 2.5; can be increased, but didnt work well with CMORE
ret = super().reset(seed=seed, options=options)
self.goal = self.np_random.uniform(1.0, 2.5, 1)
return super().reset(seed=seed, options=options)
return ret
# reset_model had to be implemented in every env to make it deterministic
def reset_model(self):

View File

@ -127,8 +127,9 @@ class HalfCheetahJumpEnv(HalfCheetahEnvCustomXML):
-> Tuple[ObsType, Dict[str, Any]]:
self.max_height = 0
self.current_step = 0
ret = super().reset(seed=seed, options=options)
self.goal = self.np_random.uniform(1.1, 1.6, 1) # 1.1 1.6
return super().reset(seed=seed, options=options)
return ret
# overwrite reset_model to make it deterministic
def reset_model(self):

View File

@ -150,10 +150,11 @@ class HopperJumpOnBoxEnv(HopperEnvCustomXML):
self.min_distance = 5000
self.current_step = 0
self.hopper_on_box = False
ret = super().reset(seed=seed, options=options)
if self.context:
self.box_x = self.np_random.uniform(1, 3, 1)
self.model.body("box").pos = [self.box_x[0], 0, 0]
return super().reset(seed=seed, options=options)
return ret
# overwrite reset_model to make it deterministic
def reset_model(self):

View File

@ -100,8 +100,9 @@ class HopperThrowEnv(HopperEnvCustomXML):
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None) \
-> Tuple[ObsType, Dict[str, Any]]:
self.current_step = 0
ret = super().reset(seed=seed, options=options)
self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1) # 0.5 8.0
return super().reset(seed=seed, options=options)
return ret
# overwrite reset_model to make it deterministic
def reset_model(self):

View File

@ -130,10 +130,11 @@ class HopperThrowInBasketEnv(HopperEnvCustomXML):
self.current_step = 0
self.ball_in_basket = False
ret = super().reset(seed=seed, options=options)
if self.context:
self.basket_x = self.np_random.uniform(low=3, high=7, size=1)
self.model.body("basket_ground").pos[:] = [self.basket_x[0], 0, 0]
return super().reset(seed=seed, options=options)
return ret
# overwrite reset_model to make it deterministic
def reset_model(self):

View File

@ -152,8 +152,9 @@ class Walker2dJumpEnv(Walker2dEnvCustomXML):
-> Tuple[ObsType, Dict[str, Any]]:
self.current_step = 0
self.max_height = 0
ret = super().reset(seed=seed, options=options)
self.goal = self.np_random.uniform(1.5, 2.5, 1) # 1.5 3.0
return super().reset(seed=seed, options=options)
return ret
# overwrite reset_model to make it deterministic
def reset_model(self):