update bp step based env
This commit is contained in:
parent
8b8be4b582
commit
719b40c4e4
@ -446,6 +446,17 @@ register(
|
||||
"frame_skip": 2
|
||||
}
|
||||
)
|
||||
# Beerpong with episodic reward, but fixed release time step
|
||||
register(
|
||||
id='ALRBeerPong-v3',
|
||||
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
|
||||
max_episode_steps=300,
|
||||
kwargs={
|
||||
"rndm_goal": True,
|
||||
"cup_goal_pos": [-0.3, -1.2],
|
||||
"frame_skip": 2
|
||||
}
|
||||
)
|
||||
|
||||
# Motion Primitive Environments
|
||||
|
||||
|
@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
|
||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||
from .table_tennis.tt_gym import TTEnvGym
|
||||
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
|
||||
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
|
||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
||||
|
@ -186,6 +186,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
||||
|
||||
|
||||
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||
|
||||
def step(self, a):
|
||||
if self._steps < self.release_step:
|
||||
return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
|
||||
else:
|
||||
reward = 0
|
||||
done = False
|
||||
while not done:
|
||||
sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
|
||||
reward += sub_reward
|
||||
infos = sub_infos
|
||||
ob = sub_ob
|
||||
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||
# internal steps and thus, the observation also needs to be set correctly
|
||||
return ob, reward, done, infos
|
||||
|
||||
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||
@ -230,13 +250,16 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
||||
reward = reward - sub_infos['action_cost'] + dist_rew
|
||||
infos = sub_infos
|
||||
ob = sub_ob
|
||||
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||
# internal steps and thus, the observation also needs to be set correctly
|
||||
return ob, reward, done, infos
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# env = ALRBeerBongEnv(rndm_goal=True)
|
||||
env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
|
||||
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
|
||||
env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
|
||||
import time
|
||||
env.reset()
|
||||
env.render("human")
|
||||
|
Loading…
Reference in New Issue
Block a user