update bp step based env

2022-06-04 17:43:35 +02:00 · 2022-06-04 17:43:35 +02:00 · 719b40c4e4
commit 719b40c4e4
parent 8b8be4b582
3 changed files with 36 additions and 2 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -446,6 +446,17 @@ register(
            "frame_skip": 2
        }
    )
 # Beerpong with episodic reward, but fixed release time step
 register(
        id='ALRBeerPong-v3',
        entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
        max_episode_steps=300,
        kwargs={
            "rndm_goal": True,
            "cup_goal_pos": [-0.3, -1.2],
            "frame_skip": 2
        }
    )
 # Motion Primitive Environments
--- a/alr_envs/alr/mujoco/init.py
+++ b/alr_envs/alr/mujoco/init.py
@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
 from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
 from .table_tennis.tt_gym import TTEnvGym
-from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
+from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
 from .ant_jump.ant_jump import ALRAntJumpEnv
 from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
 from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
--- a/alr_envs/alr/mujoco/beerpong/beerpong.py
+++ b/alr_envs/alr/mujoco/beerpong/beerpong.py
@ -186,6 +186,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
        return super(ALRBeerBongEnv, self).dt*self.repeat_action
 class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
    def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
        super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
        self.release_step = 62  # empirically evaluated for frame_skip=2!
    def step(self, a):
        if self._steps < self.release_step:
            return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
        else:
            reward = 0
            done = False
            while not done:
                sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
                reward += sub_reward
            infos = sub_infos
            ob = sub_ob
            ob[-1] = self.release_step + 1     # Since we simulate until the end of the episode, PPO does not see the
                                               # internal steps and thus, the observation also needs to be set correctly
        return ob, reward, done, infos
 class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
    def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
        super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
@ -230,13 +250,16 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
                    reward = reward - sub_infos['action_cost'] + dist_rew
            infos = sub_infos
            ob = sub_ob
            ob[-1] = self.release_step + 1  # Since we simulate until the end of the episode, PPO does not see the
            # internal steps and thus, the observation also needs to be set correctly
        return ob, reward, done, infos
 if __name__ == "__main__":
    # env = ALRBeerBongEnv(rndm_goal=True)
-    env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
+    # env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
    env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
    import time
    env.reset()
    env.render("human")