update bp step based env
This commit is contained in:
parent
8b8be4b582
commit
719b40c4e4
@ -446,6 +446,17 @@ register(
|
|||||||
"frame_skip": 2
|
"frame_skip": 2
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Beerpong with episodic reward, but fixed release time step
|
||||||
|
register(
|
||||||
|
id='ALRBeerPong-v3',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBasedEpisodicReward',
|
||||||
|
max_episode_steps=300,
|
||||||
|
kwargs={
|
||||||
|
"rndm_goal": True,
|
||||||
|
"cup_goal_pos": [-0.3, -1.2],
|
||||||
|
"frame_skip": 2
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Motion Primitive Environments
|
# Motion Primitive Environments
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
|
|||||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
from .table_tennis.tt_gym import TTEnvGym
|
from .table_tennis.tt_gym import TTEnvGym
|
||||||
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
|
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased, ALRBeerBongEnvStepBasedEpisodicReward
|
||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
||||||
|
@ -186,6 +186,26 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
||||||
|
|
||||||
|
|
||||||
|
class ALRBeerBongEnvStepBasedEpisodicReward(ALRBeerBongEnv):
|
||||||
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
|
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||||
|
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||||
|
|
||||||
|
def step(self, a):
|
||||||
|
if self._steps < self.release_step:
|
||||||
|
return super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(a)
|
||||||
|
else:
|
||||||
|
reward = 0
|
||||||
|
done = False
|
||||||
|
while not done:
|
||||||
|
sub_ob, sub_reward, done, sub_infos = super(ALRBeerBongEnvStepBasedEpisodicReward, self).step(np.zeros(a.shape))
|
||||||
|
reward += sub_reward
|
||||||
|
infos = sub_infos
|
||||||
|
ob = sub_ob
|
||||||
|
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||||
|
# internal steps and thus, the observation also needs to be set correctly
|
||||||
|
return ob, reward, done, infos
|
||||||
|
|
||||||
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False, rndm_goal=False, cup_goal_pos=None):
|
||||||
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
super().__init__(frame_skip, apply_gravity_comp, noisy, rndm_goal, cup_goal_pos)
|
||||||
@ -230,13 +250,16 @@ class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
|||||||
reward = reward - sub_infos['action_cost'] + dist_rew
|
reward = reward - sub_infos['action_cost'] + dist_rew
|
||||||
infos = sub_infos
|
infos = sub_infos
|
||||||
ob = sub_ob
|
ob = sub_ob
|
||||||
|
ob[-1] = self.release_step + 1 # Since we simulate until the end of the episode, PPO does not see the
|
||||||
|
# internal steps and thus, the observation also needs to be set correctly
|
||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# env = ALRBeerBongEnv(rndm_goal=True)
|
# env = ALRBeerBongEnv(rndm_goal=True)
|
||||||
env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
|
# env = ALRBeerBongEnvStepBased(frame_skip=2, rndm_goal=True)
|
||||||
|
env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2, rndm_goal=True)
|
||||||
import time
|
import time
|
||||||
env.reset()
|
env.reset()
|
||||||
env.render("human")
|
env.render("human")
|
||||||
|
Loading…
Reference in New Issue
Block a user