update bp step based env

This commit is contained in:
Onur 2022-06-04 17:43:35 +02:00
parent 8b8be4b582
commit 719b40c4e4
3 changed files with 36 additions and 2 deletions

View File

@ -446,6 +446,17 @@ register(
"frame_skip": 2
}
)
# Beerpong with episodic reward, but fixed release time step
register(
id='ALRBeerPong-v3',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
max_episode_steps=300,
kwargs={
"rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
)
# Motion Primitive Environments

View File

@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased

View File

@ -186,6 +186,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return super(ALRBeerBongEnv, self).dt*self.repeat_action
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
self.release_step = 62 # empirically evaluated for frame_skip=2!
def step(self, a):
if self._steps < self.release_step:
return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
else:
reward = 0
done = False
while not done:
sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
reward += sub_reward
infos = sub_infos
ob = sub_ob
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
# internal steps and thus, the observation also needs to be set correctly
return ob, reward, done, infos
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
@ -230,13 +250,16 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
reward = reward - sub_infos['action_cost'] + dist_rew
infos = sub_infos
ob = sub_ob
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
# internal steps and thus, the observation also needs to be set correctly
return ob, reward, done, infos
if __name__ == "__main__":
# env = ALRBeerBongEnv(rndm_goal=True)
env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
import time
env.reset()
env.render("human")