bp step based -> release time for PPO
This commit is contained in:
parent
59b15e82ea
commit
24604e60be
@ -435,6 +435,17 @@ register(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# random goal cup position
|
||||||
|
register(
|
||||||
|
id='ALRBeerPong-v2',
|
||||||
|
entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased',
|
||||||
|
max_episode_steps=300,
|
||||||
|
kwargs={
|
||||||
|
"rndm_goal": True,
|
||||||
|
"cup_goal_pos": [-0.3, -1.2],
|
||||||
|
"frame_skip": 2
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Motion Primitive Environments
|
# Motion Primitive Environments
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv
|
|||||||
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
|
||||||
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
|
||||||
from .table_tennis.tt_gym import TTEnvGym
|
from .table_tennis.tt_gym import TTEnvGym
|
||||||
from .beerpong.beerpong import ALRBeerBongEnv
|
from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased
|
||||||
from .ant_jump.ant_jump import ALRAntJumpEnv
|
from .ant_jump.ant_jump import ALRAntJumpEnv
|
||||||
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv
|
||||||
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased
|
||||||
|
@ -2,7 +2,7 @@ import mujoco_py.builder
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils, spaces
|
||||||
from gym.envs.mujoco import MujocoEnv
|
from gym.envs.mujoco import MujocoEnv
|
||||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
||||||
|
|
||||||
@ -160,7 +160,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
is_collided=is_collided, sim_crash=crash,
|
is_collided=is_collided, sim_crash=crash,
|
||||||
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
|
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
|
||||||
infos.update(reward_infos)
|
infos.update(reward_infos)
|
||||||
|
|
||||||
return ob, reward, done, infos
|
return ob, reward, done, infos
|
||||||
|
|
||||||
def check_traj_in_joint_limits(self):
|
def check_traj_in_joint_limits(self):
|
||||||
@ -168,9 +167,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
theta = self.sim.data.qpos.flat[:7]
|
theta = self.sim.data.qpos.flat[:7]
|
||||||
|
theta_dot = self.sim.data.qvel.flat[:7]
|
||||||
|
ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy()
|
||||||
|
cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_final_table"]].copy()
|
||||||
|
cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy()
|
||||||
return np.concatenate([
|
return np.concatenate([
|
||||||
np.cos(theta),
|
np.cos(theta),
|
||||||
np.sin(theta),
|
np.sin(theta),
|
||||||
|
theta_dot,
|
||||||
|
cup_goal_diff_final,
|
||||||
|
cup_goal_diff_top,
|
||||||
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
|
self.sim.model.body_pos[self.cup_table_id][:2].copy(),
|
||||||
[self._steps],
|
[self._steps],
|
||||||
])
|
])
|
||||||
@ -179,14 +185,37 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def dt(self):
|
def dt(self):
|
||||||
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
return super(ALRBeerBongEnv, self).dt*self.repeat_action
|
||||||
|
|
||||||
|
|
||||||
|
class ALRBeerBongEnvStepBased(ALRBeerBongEnv):
|
||||||
|
|
||||||
|
def _set_action_space(self):
|
||||||
|
bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space()
|
||||||
|
min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype)
|
||||||
|
max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype)
|
||||||
|
self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype)
|
||||||
|
return self.action_space
|
||||||
|
|
||||||
|
def step(self, a):
|
||||||
|
self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step
|
||||||
|
return super(ALRBeerBongEnvStepBased, self).step(a[1:])
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
ob = super(ALRBeerBongEnvStepBased, self).reset()
|
||||||
|
self.release_step = self.ep_length + 1
|
||||||
|
return ob
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
env = ALRBeerBongEnv(rndm_goal=True)
|
# env = ALRBeerBongEnv(rndm_goal=True)
|
||||||
|
env = ALRBeerBongEnvStepBased(rndm_goal=True)
|
||||||
import time
|
import time
|
||||||
env.reset()
|
env.reset()
|
||||||
env.render("human")
|
env.render("human")
|
||||||
for i in range(1500):
|
for i in range(1500):
|
||||||
# ac = 10 * env.action_space.sample()[0:7]
|
# ac = 10 * env.action_space.sample()[0:7]
|
||||||
ac = np.zeros(7)
|
ac = np.zeros(8)
|
||||||
|
ac[0] = -1
|
||||||
|
if env._steps > 150:
|
||||||
|
ac[0] = 1
|
||||||
obs, rew, d, info = env.step(ac)
|
obs, rew, d, info = env.step(ac)
|
||||||
env.render("human")
|
env.render("human")
|
||||||
print(env.dt)
|
print(env.dt)
|
||||||
|
@ -12,6 +12,9 @@ class MPWrapper(MPEnvWrapper):
|
|||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 7, # cos
|
[False] * 7, # cos
|
||||||
[False] * 7, # sin
|
[False] * 7, # sin
|
||||||
|
[False] * 7, # joint velocities
|
||||||
|
[False] * 3, # cup_goal_diff_final
|
||||||
|
[False] * 3, # cup_goal_diff_top
|
||||||
[True] * 2, # xy position of cup
|
[True] * 2, # xy position of cup
|
||||||
[False] # env steps
|
[False] # env steps
|
||||||
])
|
])
|
||||||
|
@ -17,6 +17,9 @@ class NewMPWrapper(EpisodicWrapper):
|
|||||||
return np.hstack([
|
return np.hstack([
|
||||||
[False] * 7, # cos
|
[False] * 7, # cos
|
||||||
[False] * 7, # sin
|
[False] * 7, # sin
|
||||||
|
[False] * 7, # joint velocities
|
||||||
|
[False] * 3, # cup_goal_diff_final
|
||||||
|
[False] * 3, # cup_goal_diff_top
|
||||||
[True] * 2, # xy position of cup
|
[True] * 2, # xy position of cup
|
||||||
[False] # env steps
|
[False] # env steps
|
||||||
])
|
])
|
||||||
|
Loading…
Reference in New Issue
Block a user