From 92d05a9dfd32f6cd351666821ebed54cea0e6946 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Tue, 7 Dec 2021 14:46:31 +0100 Subject: [PATCH] small bp and tt updates --- alr_envs/alr/__init__.py | 51 ++++++++++--------- alr_envs/alr/mujoco/beerpong/beerpong.py | 28 +++++----- .../mujoco/beerpong/beerpong_reward_staged.py | 2 +- alr_envs/alr/mujoco/table_tennis/tt_gym.py | 14 +++-- alr_envs/alr/mujoco/table_tennis/tt_reward.py | 2 +- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/alr_envs/alr/__init__.py b/alr_envs/alr/__init__.py index e2ba068..90ec78c 100644 --- a/alr_envs/alr/__init__.py +++ b/alr_envs/alr/__init__.py @@ -204,7 +204,7 @@ register(id='TableTennis2DCtxt-v0', register(id='TableTennis2DCtxt-v1', entry_point='alr_envs.alr.mujoco:TTEnvGym', - max_episode_steps=1750, + max_episode_steps=MAX_EPISODE_STEPS, kwargs={'ctxt_dim': 2, 'fixed_goal': True}) register(id='TableTennis4DCtxt-v0', @@ -365,29 +365,32 @@ for _v in _versions: ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## Beerpong -register( - id='BeerpongProMP-v0', - entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', - kwargs={ - "name": "alr_envs:ALRBeerPong-v0", - "wrappers": [mujoco.beerpong.MPWrapper], - "mp_kwargs": { - "num_dof": 7, - "num_basis": 2, - "duration": 1, - "post_traj_time": 2, - "policy_type": "motor", - "weights_scale": 0.2, - "zero_start": True, - "zero_goal": False, - "policy_kwargs": { - "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), - "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) +_versions = ["v0", "v1", "v2", "v3"] +for _v in _versions: + _env_id = f'BeerpongProMP-{_v}' + register( + id=_env_id, + entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper', + kwargs={ + "name": f"alr_envs:ALRBeerPong-{_v}", + "wrappers": [mujoco.beerpong.MPWrapper], + "mp_kwargs": { + "num_dof": 7, + "num_basis": 2, + "duration": 1, + "post_traj_time": 2, + "policy_type": "motor", + "weights_scale": 1, + "zero_start": True, + "zero_goal": False, + "policy_kwargs": { + "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]), + "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]) + } } } - } -) -ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("BeerpongProMP-v0") + ) + ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id) ## Table Tennis ctxt_dim = [2, 4] @@ -429,7 +432,9 @@ register( "duration": 1., "post_traj_time": 2.5, "policy_type": "motor", - "weights_scale": 0.2, + "weights_scale": 1, + "off": -0.05, + "bandwidth_factor": 3.5, "zero_start": True, "zero_goal": False, "policy_kwargs": { diff --git a/alr_envs/alr/mujoco/beerpong/beerpong.py b/alr_envs/alr/mujoco/beerpong/beerpong.py index a86f0a7..755710a 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong.py @@ -127,24 +127,28 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle): self._steps += 1 else: reward = -30 + reward_infos = dict() success = False is_collided = False done = True ball_pos = np.zeros(3) ball_vel = np.zeros(3) - return ob, reward, done, dict(reward_dist=reward_dist, - reward_ctrl=reward_ctrl, - reward=reward, - velocity=angular_vel, - # traj=self._q_pos, - action=a, - q_pos=self.sim.data.qpos[0:7].ravel().copy(), - q_vel=self.sim.data.qvel[0:7].ravel().copy(), - ball_pos=ball_pos, - ball_vel=ball_vel, - success=success, - is_collided=is_collided, sim_crash=crash) + infos = dict(reward_dist=reward_dist, + reward_ctrl=reward_ctrl, + reward=reward, + velocity=angular_vel, + # traj=self._q_pos, + action=a, + q_pos=self.sim.data.qpos[0:7].ravel().copy(), + q_vel=self.sim.data.qvel[0:7].ravel().copy(), + ball_pos=ball_pos, + ball_vel=ball_vel, + success=success, + is_collided=is_collided, sim_crash=crash) + infos.update(reward_infos) + + return ob, reward, done, infos def check_traj_in_joint_limits(self): return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) diff --git a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py index d64f179..e94b470 100644 --- a/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py +++ b/alr_envs/alr/mujoco/beerpong/beerpong_reward_staged.py @@ -110,7 +110,7 @@ class BeerPongReward: success = ball_in_cup crash = self._is_collided else: - reward = - 1e-4 * action_cost + reward = - 1e-2 * action_cost success = False crash = False diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py index f42c4c7..d1c2dc3 100644 --- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py +++ b/alr_envs/alr/mujoco/table_tennis/tt_gym.py @@ -10,7 +10,7 @@ from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward #TODO: Check for simulation stability. Make sure the code runs even for sim crash -MAX_EPISODE_STEPS = 2875 +MAX_EPISODE_STEPS = 1750 BALL_NAME_CONTACT = "target_ball_contact" BALL_NAME = "target_ball" TABLE_NAME = 'table_tennis_table' @@ -42,9 +42,10 @@ class TTEnvGym(MujocoEnv, utils.EzPickle): else: raise ValueError("either 2 or 4 dimensional Contexts available") - action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) - action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) - self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') + # has no effect as it is overwritten in init of super + # action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) + # action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) + # self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') self.time_steps = 0 self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0]) @@ -159,7 +160,10 @@ class TTEnvGym(MujocoEnv, utils.EzPickle): done = True reward = -25 ob = self._get_obs() - return ob, reward, done, {"hit_ball": self.hit_ball} # might add some information here .... + info = {"hit_ball": self.hit_ball, + "q_pos": np.copy(self.sim.data.qpos[:7]), + "ball_pos": np.copy(self.sim.data.qpos[7:])} + return ob, reward, done, info # might add some information here .... def set_context(self, context): old_state = self.sim.get_state() diff --git a/alr_envs/alr/mujoco/table_tennis/tt_reward.py b/alr_envs/alr/mujoco/table_tennis/tt_reward.py index eab2dd3..0e1bebf 100644 --- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py +++ b/alr_envs/alr/mujoco/table_tennis/tt_reward.py @@ -19,7 +19,7 @@ class TT_Reward: # # seems to work for episodic case min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1)) if not hited_ball: - return 0.2 * (1- np.tanh(min_r_b_dist**2)) + return 0.2 * (1 - np.tanh(min_r_b_dist**2)) else: if ball_landing_pos is None: min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))