bp step based -> release time for PPO

This commit is contained in:
Onur 2022-06-02 09:05:38 +02:00
parent 59b15e82ea
commit 24604e60be
5 changed files with 51 additions and 5 deletions

View File

@ -435,6 +435,17 @@ register(
} }
) )
# random goal cup position
register(
id='ALRBeerPong-v2',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
max_episode_steps=300,
kwargs={
"rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
)
# Motion Primitive Environments # Motion Primitive Environments

View File

@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
from .ant_jump.ant_jump import ALRAntJumpEnv from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased

View File

@ -2,7 +2,7 @@ import mujoco_py.builder
import os import os
import numpy as np import numpy as np
from gym import utils from gym import utils, spaces
from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@ -160,7 +160,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
is_collided=is_collided, sim_crash=crash, is_collided=is_collided, sim_crash=crash,
table_contact_first=int(not self.reward_function.ball_ground_contact_first)) table_contact_first=int(not self.reward_function.ball_ground_contact_first))
infos.update(reward_infos) infos.update(reward_infos)
return ob, reward, done, infos return ob, reward, done, infos
def check_traj_in_joint_limits(self): def check_traj_in_joint_limits(self):
@ -168,9 +167,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def _get_obs(self): def _get_obs(self):
theta = self.sim.data.qpos.flat[:7] theta = self.sim.data.qpos.flat[:7]
theta_dot = self.sim.data.qvel.flat[:7]
ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_final_table"]].copy()
cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy()
return np.concatenate([ return np.concatenate([
np.cos(theta), np.cos(theta),
np.sin(theta), np.sin(theta),
theta_dot,
cup_goal_diff_final,
cup_goal_diff_top,
self.sim.model.body_pos[self.cup_table_id][:2].copy(), self.sim.model.body_pos[self.cup_table_id][:2].copy(),
[self._steps], [self._steps],
]) ])
@ -179,14 +185,37 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def dt(self): def dt(self):
return super(ALRBeerBongEnv, self).dt*self.repeat_action return super(ALRBeerBongEnv, self).dt*self.repeat_action
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
def _set_action_space(self):
bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
return self.action_space
def step(self, a):
self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
return super(ALRBeerBongEnvStepBased, self).step(a[1:])
def reset(self):
ob = super(ALRBeerBongEnvStepBased, self).reset()
self.release_step = self.ep_length + 1
return ob
if __name__ == "__main__": if __name__ == "__main__":
env = ALRBeerBongEnv(rndm_goal=True) # env = ALRBeerBongEnv(rndm_goal=True)
env = ALRBeerBongEnvStepBased(rndm_goal=True)
import time import time
env.reset() env.reset()
env.render("human") env.render("human")
for i in range(1500): for i in range(1500):
# ac = 10 * env.action_space.sample()[0:7] # ac = 10 * env.action_space.sample()[0:7]
ac = np.zeros(7) ac = np.zeros(8)
ac[0] = -1
if env._steps > 150:
ac[0] = 1
obs, rew, d, info = env.step(ac) obs, rew, d, info = env.step(ac)
env.render("human") env.render("human")
print(env.dt) print(env.dt)

View File

@ -12,6 +12,9 @@ class MPWrapper(MPEnvWrapper):
return np.hstack([ return np.hstack([
[False] * 7, # cos [False] * 7, # cos
[False] * 7, # sin [False] * 7, # sin
[False] * 7, # joint velocities
[False] * 3, # cup_goal_diff_final
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup [True] * 2, # xy position of cup
[False] # env steps [False] # env steps
]) ])

View File

@ -17,6 +17,9 @@ class NewMPWrapper(EpisodicWrapper):
return np.hstack([ return np.hstack([
[False] * 7, # cos [False] * 7, # cos
[False] * 7, # sin [False] * 7, # sin
[False] * 7, # joint velocities
[False] * 3, # cup_goal_diff_final
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup [True] * 2, # xy position of cup
[False] # env steps [False] # env steps
]) ])