bp step based -> release time for PPO

This commit is contained in:
Onur 2022-06-02 09:05:38 +02:00
parent 59b15e82ea
commit 24604e60be
5 changed files with 51 additions and 5 deletions

View File

@ -435,6 +435,17 @@ register(
}
)
# random goal cup position
register(
id='ALRBeerPong-v2',
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
max_episode_steps=300,
kwargs={
"rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
"frame_skip": 2
}
)
# Motion Primitive Environments

View File

@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
from .table_tennis.tt_gym import TTEnvGym
from .beerpong.beerpong import ALRBeerBongEnv
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
from .ant_jump.ant_jump import ALRAntJumpEnv
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased

View File

@ -2,7 +2,7 @@ import mujoco_py.builder
import os
import numpy as np
from gym import utils
from gym import utils, spaces
from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@ -160,7 +160,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
is_collided=is_collided, sim_crash=crash,
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
infos.update(reward_infos)
return ob, reward, done, infos
def check_traj_in_joint_limits(self):
@ -168,9 +167,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
theta_dot = self.sim.data.qvel.flat[:7]
ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_final_table"]].copy()
cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy()
return np.concatenate([
np.cos(theta),
np.sin(theta),
theta_dot,
cup_goal_diff_final,
cup_goal_diff_top,
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
[self._steps],
])
@ -179,14 +185,37 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def dt(self):
return super(ALRBeerBongEnv, self).dt*self.repeat_action
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
def _set_action_space(self):
bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
return self.action_space
def step(self, a):
self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
return super(ALRBeerBongEnvStepBased, self).step(a[1:])
def reset(self):
ob = super(ALRBeerBongEnvStepBased, self).reset()
self.release_step = self.ep_length + 1
return ob
if __name__ == "__main__":
env = ALRBeerBongEnv(rndm_goal=True)
# env = ALRBeerBongEnv(rndm_goal=True)
env = ALRBeerBongEnvStepBased(rndm_goal=True)
import time
env.reset()
env.render("human")
for i in range(1500):
# ac = 10 * env.action_space.sample()[0:7]
ac = np.zeros(7)
ac = np.zeros(8)
ac[0] = -1
if env._steps > 150:
ac[0] = 1
obs, rew, d, info = env.step(ac)
env.render("human")
print(env.dt)

View File

@ -12,6 +12,9 @@ class MPWrapper(MPEnvWrapper):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
[False] * 7, # joint velocities
[False] * 3, # cup_goal_diff_final
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
[False] # env steps
])

View File

@ -17,6 +17,9 @@ class NewMPWrapper(EpisodicWrapper):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
[False] * 7, # joint velocities
[False] * 3, # cup_goal_diff_final
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
[False] # env steps
])