remove minor attributes from BP

This commit is contained in:
Onur 2022-07-13 09:52:51 +02:00
parent 14ee580473
commit ce00996782
6 changed files with 21 additions and 24 deletions

View File

@ -122,7 +122,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step""" """ This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO remove this part, right now only needed for beer pong # TODO remove this part, right now only needed for beer pong
mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen) mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen)
trajectory, velocity = self.get_trajectory(mp_params) trajectory, velocity = self.get_trajectory(mp_params)
trajectory_length = len(trajectory) trajectory_length = len(trajectory)

View File

@ -52,12 +52,7 @@ class RawInterfaceWrapper(gym.Wrapper):
""" """
return self.env.dt return self.env.dt
def do_replanning(self, pos, vel, s, a, t): def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
# return t % 100 == 0
# return bool(self.replanning_model(s))
return False
def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
np.ndarray, Union[np.ndarray, None]]: np.ndarray, Union[np.ndarray, None]]:
""" """
Used to extract the parameters for the motion primitive and other parameters from an action array which might Used to extract the parameters for the motion primitive and other parameters from an action array which might

View File

@ -13,6 +13,7 @@ from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
from .mujoco.reacher.reacher import ReacherEnv from .mujoco.reacher.reacher import ReacherEnv
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
@ -192,7 +193,7 @@ register(
register( register(
id='BeerPong-v0', id='BeerPong-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnv', entry_point='alr_envs.envs.mujoco:BeerPongEnv',
max_episode_steps=300, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
# Here we use the same reward as in BeerPong-v0, but now consider after the release, # Here we use the same reward as in BeerPong-v0, but now consider after the release,
@ -200,14 +201,14 @@ register(
register( register(
id='BeerPongStepBased-v0', id='BeerPongStepBased-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward', entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
max_episode_steps=300, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
# Beerpong with episodic reward, but fixed release time step # Beerpong with episodic reward, but fixed release time step
register( register(
id='BeerPongFixedRelease-v0', id='BeerPongFixedRelease-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep', entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep',
max_episode_steps=300, max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
) )
# Motion Primitive Environments # Motion Primitive Environments

View File

@ -6,6 +6,8 @@ import numpy as np
from gym import utils from gym import utils
from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco import MujocoEnv
MAX_EPISODE_STEPS_BEERPONG = 300
# XML Variables # XML Variables
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom", ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom", "wrist_pitch_link_convex_decomposition_p1_geom",
@ -28,7 +30,7 @@ CUP_COLLISION_OBJ = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "c
class BeerPongEnv(MujocoEnv, utils.EzPickle): class BeerPongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=2): def __init__(self):
self._steps = 0 self._steps = 0
# Small Context -> Easier. Todo: Should we do different versions? # Small Context -> Easier. Todo: Should we do different versions?
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", # self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@ -45,9 +47,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
self._start_vel = np.zeros(7) self._start_vel = np.zeros(7)
self.release_step = 100 # time step of ball release self.release_step = 100 # time step of ball release
self.ep_length = 600 // frame_skip
self.repeat_action = frame_skip self.repeat_action = 2
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this # TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
self.model = None self.model = None
self.site_id = lambda x: self.model.site_name2id(x) self.site_id = lambda x: self.model.site_name2id(x)
@ -127,8 +128,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
if not crash: if not crash:
reward, reward_infos = self._get_reward(applied_action) reward, reward_infos = self._get_reward(applied_action)
is_collided = reward_infos['is_collided'] is_collided = reward_infos['is_collided'] # TODO: Remove if self collision does not make a difference
done = is_collided or self._steps == self.ep_length - 1 done = is_collided
self._steps += 1 self._steps += 1
else: else:
reward = -30 reward = -30
@ -182,7 +183,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
# Is this needed? # Is this needed?
# self._is_collided = self._check_collision_with_itself([self.geom_id(name) for name in CUP_COLLISION_OBJ]) # self._is_collided = self._check_collision_with_itself([self.geom_id(name) for name in CUP_COLLISION_OBJ])
if self._steps == self.ep_length - 1: # or self._is_collided: if self._steps == MAX_EPISODE_STEPS_BEERPONG-1: # or self._is_collided:
min_dist = np.min(self.dists) min_dist = np.min(self.dists)
final_dist = self.dists_final[-1] final_dist = self.dists_final[-1]
if self.ball_ground_contact_first: if self.ball_ground_contact_first:
@ -251,14 +252,14 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
class BeerPongEnvFixedReleaseStep(BeerPongEnv): class BeerPongEnvFixedReleaseStep(BeerPongEnv):
def __init__(self, frame_skip=2): def __init__(self):
super().__init__(frame_skip) super().__init__()
self.release_step = 62 # empirically evaluated for frame_skip=2! self.release_step = 62 # empirically evaluated for frame_skip=2!
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv): class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
def __init__(self, frame_skip=2): def __init__(self):
super().__init__(frame_skip) super().__init__()
self.release_step = 62 # empirically evaluated for frame_skip=2! self.release_step = 62 # empirically evaluated for frame_skip=2!
def step(self, a): def step(self, a):
@ -312,7 +313,7 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
if __name__ == "__main__": if __name__ == "__main__":
env = BeerPongEnv(frame_skip=2) env = BeerPongEnv()
env.seed(0) env.seed(0)
# env = ALRBeerBongEnvStepBased(frame_skip=2) # env = ALRBeerBongEnvStepBased(frame_skip=2)
# env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2) # env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2)

View File

@ -28,7 +28,7 @@ class MPWrapper(RawInterfaceWrapper):
return self.env.data.qvel[0:7].copy() return self.env.data.qvel[0:7].copy()
# TODO: Fix this # TODO: Fix this
def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if mp.learn_tau: if mp.learn_tau:
self.env.env.release_step = action[0] / self.env.dt # Tau value self.env.env.release_step = action[0] / self.env.dt # Tau value
return action, None return action, None

View File

@ -38,7 +38,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
super().__init__(env) super().__init__(env)
assert isinstance(env.observation_space, Box) assert isinstance(env.observation_space, Box)
low = np.append(self.observation_space.low, 0.0) low = np.append(self.observation_space.low, 0.0)
high = np.append(self.observation_space.high, np.inf) high = np.append(self.observation_space.high, 1.0)
self.observation_space = Box(low, high, dtype=self.observation_space.dtype) self.observation_space = Box(low, high, dtype=self.observation_space.dtype)
self.t = 0 self.t = 0
@ -51,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
Returns: Returns:
The observation with the time step appended to The observation with the time step appended to
""" """
return np.append(observation, self.t) return np.append(observation, self.t/self.env.spec.max_episode_steps)
def step(self, action): def step(self, action):
"""Steps through the environment, incrementing the time step. """Steps through the environment, incrementing the time step.