From ce00996782f987242c128b1600c8a96146eb0064 Mon Sep 17 00:00:00 2001 From: Onur Date: Wed, 13 Jul 2022 09:52:51 +0200 Subject: [PATCH] remove minor attributes from BP --- alr_envs/black_box/black_box_wrapper.py | 2 +- alr_envs/black_box/raw_interface_wrapper.py | 7 +------ alr_envs/envs/__init__.py | 7 ++++--- alr_envs/envs/mujoco/beerpong/beerpong.py | 23 +++++++++++---------- alr_envs/envs/mujoco/beerpong/mp_wrapper.py | 2 +- alr_envs/utils/time_aware_observation.py | 4 ++-- 6 files changed, 21 insertions(+), 24 deletions(-) diff --git a/alr_envs/black_box/black_box_wrapper.py b/alr_envs/black_box/black_box_wrapper.py index 0ac2d58..6050740 100644 --- a/alr_envs/black_box/black_box_wrapper.py +++ b/alr_envs/black_box/black_box_wrapper.py @@ -122,7 +122,7 @@ class BlackBoxWrapper(gym.ObservationWrapper): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" # TODO remove this part, right now only needed for beer pong - mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen) + mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen) trajectory, velocity = self.get_trajectory(mp_params) trajectory_length = len(trajectory) diff --git a/alr_envs/black_box/raw_interface_wrapper.py b/alr_envs/black_box/raw_interface_wrapper.py index 019a268..ef93fdf 100644 --- a/alr_envs/black_box/raw_interface_wrapper.py +++ b/alr_envs/black_box/raw_interface_wrapper.py @@ -52,12 +52,7 @@ class RawInterfaceWrapper(gym.Wrapper): """ return self.env.dt - def do_replanning(self, pos, vel, s, a, t): - # return t % 100 == 0 - # return bool(self.replanning_model(s)) - return False - - def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[ + def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[ np.ndarray, Union[np.ndarray, None]]: """ Used to extract the parameters for the motion primitive and other parameters from an action array which might diff --git a/alr_envs/envs/__init__.py b/alr_envs/envs/__init__.py index 632e1fc..cb2d3ee 100644 --- a/alr_envs/envs/__init__.py +++ b/alr_envs/envs/__init__.py @@ -13,6 +13,7 @@ from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET +from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG from .mujoco.reacher.reacher import ReacherEnv from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP @@ -192,7 +193,7 @@ register( register( id='BeerPong-v0', entry_point='alr_envs.envs.mujoco:BeerPongEnv', - max_episode_steps=300, + max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, ) # Here we use the same reward as in BeerPong-v0, but now consider after the release, @@ -200,14 +201,14 @@ register( register( id='BeerPongStepBased-v0', entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', - max_episode_steps=300, + max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, ) # Beerpong with episodic reward, but fixed release time step register( id='BeerPongFixedRelease-v0', entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep', - max_episode_steps=300, + max_episode_steps=MAX_EPISODE_STEPS_BEERPONG, ) # Motion Primitive Environments diff --git a/alr_envs/envs/mujoco/beerpong/beerpong.py b/alr_envs/envs/mujoco/beerpong/beerpong.py index b49c180..f92f815 100644 --- a/alr_envs/envs/mujoco/beerpong/beerpong.py +++ b/alr_envs/envs/mujoco/beerpong/beerpong.py @@ -6,6 +6,8 @@ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv +MAX_EPISODE_STEPS_BEERPONG = 300 + # XML Variables ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom", "wrist_pitch_link_convex_decomposition_p1_geom", @@ -28,7 +30,7 @@ CUP_COLLISION_OBJ = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "c class BeerPongEnv(MujocoEnv, utils.EzPickle): - def __init__(self, frame_skip=2): + def __init__(self): self._steps = 0 # Small Context -> Easier. Todo: Should we do different versions? # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", @@ -45,9 +47,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): self._start_vel = np.zeros(7) self.release_step = 100 # time step of ball release - self.ep_length = 600 // frame_skip - self.repeat_action = frame_skip + self.repeat_action = 2 # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this self.model = None self.site_id = lambda x: self.model.site_name2id(x) @@ -127,8 +128,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): if not crash: reward, reward_infos = self._get_reward(applied_action) - is_collided = reward_infos['is_collided'] - done = is_collided or self._steps == self.ep_length - 1 + is_collided = reward_infos['is_collided'] # TODO: Remove if self collision does not make a difference + done = is_collided self._steps += 1 else: reward = -30 @@ -182,7 +183,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): # Is this needed? # self._is_collided = self._check_collision_with_itself([self.geom_id(name) for name in CUP_COLLISION_OBJ]) - if self._steps == self.ep_length - 1: # or self._is_collided: + if self._steps == MAX_EPISODE_STEPS_BEERPONG-1: # or self._is_collided: min_dist = np.min(self.dists) final_dist = self.dists_final[-1] if self.ball_ground_contact_first: @@ -251,14 +252,14 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle): class BeerPongEnvFixedReleaseStep(BeerPongEnv): - def __init__(self, frame_skip=2): - super().__init__(frame_skip) + def __init__(self): + super().__init__() self.release_step = 62 # empirically evaluated for frame_skip=2! class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): - def __init__(self, frame_skip=2): - super().__init__(frame_skip) + def __init__(self): + super().__init__() self.release_step = 62 # empirically evaluated for frame_skip=2! def step(self, a): @@ -312,7 +313,7 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): if __name__ == "__main__": - env = BeerPongEnv(frame_skip=2) + env = BeerPongEnv() env.seed(0) # env = ALRBeerBongEnvStepBased(frame_skip=2) # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2) diff --git a/alr_envs/envs/mujoco/beerpong/mp_wrapper.py b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py index 5b53e77..fd79a63 100644 --- a/alr_envs/envs/mujoco/beerpong/mp_wrapper.py +++ b/alr_envs/envs/mujoco/beerpong/mp_wrapper.py @@ -28,7 +28,7 @@ class MPWrapper(RawInterfaceWrapper): return self.env.data.qvel[0:7].copy() # TODO: Fix this - def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: + def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: if mp.learn_tau: self.env.env.release_step = action[0] / self.env.dt # Tau value return action, None diff --git a/alr_envs/utils/time_aware_observation.py b/alr_envs/utils/time_aware_observation.py index 95d7ab9..92a7e9d 100644 --- a/alr_envs/utils/time_aware_observation.py +++ b/alr_envs/utils/time_aware_observation.py @@ -38,7 +38,7 @@ class TimeAwareObservation(gym.ObservationWrapper): super().__init__(env) assert isinstance(env.observation_space, Box) low = np.append(self.observation_space.low, 0.0) - high = np.append(self.observation_space.high, np.inf) + high = np.append(self.observation_space.high, 1.0) self.observation_space = Box(low, high, dtype=self.observation_space.dtype) self.t = 0 @@ -51,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper): Returns: The observation with the time step appended to """ - return np.append(observation, self.t) + return np.append(observation, self.t/self.env.spec.max_episode_steps) def step(self, action): """Steps through the environment, incrementing the time step.