hopper throw seeding fixed

2022-07-12 14:18:01 +02:00 · 2022-07-12 14:18:01 +02:00 · 993df10fad
commit 993df10fad
parent 5d4fc4d52f
2 changed files with 23 additions and 21 deletions
--- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw.py
@ -40,10 +40,10 @@ class ALRHopperThrowEnv(HopperEnv):
                         exclude_current_positions_from_observation)
    def step(self, action):
        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
-        ball_pos_after = self.get_body_com("ball")[0] #abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
+        ball_pos_after = self.get_body_com("ball")[
            0]  # abs(self.get_body_com("ball")[0]) # use x and y to get point and use euclid distance as reward?
        ball_pos_after_y = self.get_body_com("ball")[2]
        # done = self.done TODO We should use this, not sure why there is no other termination; ball_landed should be enough, because we only look at the throw itself? - Paul and Marc
@ -78,7 +78,7 @@ class ALRHopperThrowEnv(HopperEnv):
    def reset(self):
        self.current_step = 0
-        self.goal = self.goal = np.random.uniform(2.0, 6.0, 1) # 0.5 8.0
+        self.goal = self.goal = self.np_random.uniform(2.0, 6.0, 1)  # 0.5 8.0
        return super().reset()
    # overwrite reset_model to make it deterministic
@ -94,6 +94,7 @@ class ALRHopperThrowEnv(HopperEnv):
        observation = self._get_obs()
        return observation
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
    env = ALRHopperThrowEnv()
--- a/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
+++ b/alr_envs/alr/mujoco/hopper_throw/hopper_throw_in_basket.py
@ -3,7 +3,6 @@ from gym.envs.mujoco.hopper_v3 import HopperEnv
 import numpy as np
 MAX_EPISODE_STEPS_HOPPERTHROWINBASKET = 250
@ -57,7 +56,8 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
        basket_center = (basket_pos[0] + 0.5, basket_pos[1], basket_pos[2])
        is_in_basket_x = ball_pos[0] >= basket_pos[0] and ball_pos[0] <= basket_pos[0] + self.basket_size
-        is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size/2) and ball_pos[1] <= basket_pos[1] + (self.basket_size/2)
+        is_in_basket_y = ball_pos[1] >= basket_pos[1] - (self.basket_size / 2) and ball_pos[1] <= basket_pos[1] + (
                    self.basket_size / 2)
        is_in_basket_z = ball_pos[2] < 0.1
        is_in_basket = is_in_basket_x and is_in_basket_y and is_in_basket_z
        if is_in_basket: self.ball_in_basket = True
@ -84,7 +84,8 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
                    rewards -= (dist * dist)
        else:
            # penalty not needed
-            rewards += ((action[:2] > 0) * self.penalty).sum() if self.current_step < 10 else 0 #too much of a penalty?
+            rewards += ((action[
                         :2] > 0) * self.penalty).sum() if self.current_step < 10 else 0  # too much of a penalty?
        observation = self._get_obs()
        reward = rewards - costs
@ -106,7 +107,7 @@ class ALRHopperThrowInBasketEnv(HopperEnv):
        self.ball_in_basket = False
        if self.context:
            basket_id = self.sim.model.body_name2id("basket_ground")
-            self.basket_x = np.random.uniform(3, 7, 1)
+            self.basket_x = self.np_random.uniform(3, 7, 1)
            self.sim.model.body_pos[basket_id] = [self.basket_x, 0, 0]
        return super().reset()