From 24604e60be365e38304961c0626b8ad8651f85e7 Mon Sep 17 00:00:00 2001 From: Onur Date: Thu, 2 Jun 2022 09:05:38 +0200 Subject: [PATCH] bp step based -> release time for PPO --- alr_envs/alr/__init__.py | 11 ++++++ alr_envs/alr/mujoco/__init__.py | 2 +- alr_envs/alr/mujoco/beerpong/beerpong.py | 37 +++++++++++++++++-- alr_envs/alr/mujoco/beerpong/mp_wrapper.py | 3 ++ .../alr/mujoco/beerpong/new_mp_wrapper.py | 3 ++ 5 files changed, 51 insertions(+), 5 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index 8315a09..978e85c 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -435,6 +435,17 @@ register( } ) +# random goal cup position +register( + id='ALRBeerPong-v2', + entry_point='alr_envs.alr.mujoco:ALRBeerBongEnvStepBased', + max_episode_steps=300, + kwargs={ + "rndm_goal": True, + "cup_goal_pos": [-0.3, -1.2], + "frame_skip": 2 + } + ) # Motion Primitive Environments diff --git a/alr_envs/alr/mujoco/__init__.py b/alr_envs/alr/mujoco/__init__.py index c02a70f..1cde867 100644 --- a/alr_envs/alr/mujoco/__init__.py +++ b/alr_envs/alr/mujoco/__init__.py @@ -2,7 +2,7 @@ from .reacher.balancing import BalancingEnv from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv from .table_tennis.tt_gym import TTEnvGym -from .beerpong.beerpong import ALRBeerBongEnv +from .beerpong.beerpong import ALRBeerBongEnv, ALRBeerBongEnvStepBased from .ant_jump.ant_jump import ALRAntJumpEnv from .half_cheetah_jump.half_cheetah_jump import ALRHalfCheetahJumpEnv from .hopper_jump.hopper_jump import ALRHopperJumpEnv, ALRHopperJumpRndmPosEnv, ALRHopperXYJumpEnv, ALRHopperXYJumpEnvStepBased diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index 8846643..0678da6 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -2,7 +2,7 @@ import mujoco_py.builder import os import numpy as np -from gym import utils +from gym import utils, spaces from gym.envs.mujoco import MujocoEnv from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward @@ -160,7 +160,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): is_collided=is_collided, sim_crash=crash, table_contact_first=int(not self.reward_function.ball_ground_contact_first)) infos.update(reward_infos) - return ob, reward, done, infos def check_traj_in_joint_limits(self): @@ -168,9 +167,16 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): def _get_obs(self): theta = self.sim.data.qpos.flat[:7] + theta_dot = self.sim.data.qvel.flat[:7] + ball_pos = self.sim.data.body_xpos[self.sim.model._body_name2id["ball"]].copy() + cup_goal_diff_final = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_final_table"]].copy() + cup_goal_diff_top = ball_pos - self.sim.data.site_xpos[self.sim.model._site_name2id["cup_goal_table"]].copy() return np.concatenate([ np.cos(theta), np.sin(theta), + theta_dot, + cup_goal_diff_final, + cup_goal_diff_top, self.sim.model.body_pos[self.cup_table_id][:2].copy(), [self._steps], ]) @@ -179,14 +185,37 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): def dt(self): return super(ALRBeerBongEnv, self).dt*self.repeat_action + +class ALRBeerBongEnvStepBased(ALRBeerBongEnv): + + def _set_action_space(self): + bounds = super(ALRBeerBongEnvStepBased, self)._set_action_space() + min_bound = np.concatenate(([-1], bounds.low), dtype=bounds.dtype) + max_bound = np.concatenate(([1], bounds.high), dtype=bounds.dtype) + self.action_space = spaces.Box(low=min_bound, high=max_bound, dtype=bounds.dtype) + return self.action_space + + def step(self, a): + self.release_step = self._steps if a[0]>=0 and self.release_step >= self._steps else self.release_step + return super(ALRBeerBongEnvStepBased, self).step(a[1:]) + + def reset(self): + ob = super(ALRBeerBongEnvStepBased, self).reset() + self.release_step = self.ep_length + 1 + return ob + if __name__ == "__main__": - env = ALRBeerBongEnv(rndm_goal=True) + # env = ALRBeerBongEnv(rndm_goal=True) + env = ALRBeerBongEnvStepBased(rndm_goal=True) import time env.reset() env.render("human") for i in range(1500): # ac = 10 * env.action_space.sample()[0:7] - ac = np.zeros(7) + ac = np.zeros(8) + ac[0] = -1 + if env._steps > 150: + ac[0] = 1 obs, rew, d, info = env.step(ac) env.render("human") print(env.dt) diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py index 11af9a5..022490c 100644 --- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py +++ b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py @@ -12,6 +12,9 @@ class MPWrapper(MPEnvWrapper): return np.hstack([ [False] * 7, # cos [False] * 7, # sin + [False] * 7, # joint velocities + [False] * 3, # cup_goal_diff_final + [False] * 3, # cup_goal_diff_top [True] * 2, # xy position of cup [False] # env steps ]) diff --git a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py index 2bdc11a..2a2d4f9 100644 --- a/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py +++ b/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py @@ -17,6 +17,9 @@ class NewMPWrapper(EpisodicWrapper): return np.hstack([ [False] * 7, # cos [False] * 7, # sin + [False] * 7, # joint velocities + [False] * 3, # cup_goal_diff_final + [False] * 3, # cup_goal_diff_top [True] * 2, # xy position of cup [False] # env steps ])