remove minor attributes from BP

This commit is contained in:
Onur 2022-07-13 09:52:51 +02:00
parent 14ee580473
commit ce00996782
6 changed files with 21 additions and 24 deletions

View File

@ -122,7 +122,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
# TODO remove this part, right now only needed for beer pong
mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen)
mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen)
trajectory, velocity = self.get_trajectory(mp_params)
trajectory_length = len(trajectory)

View File

@ -52,12 +52,7 @@ class RawInterfaceWrapper(gym.Wrapper):
"""
return self.env.dt
def do_replanning(self, pos, vel, s, a, t):
# return t % 100 == 0
# return bool(self.replanning_model(s))
return False
def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
np.ndarray, Union[np.ndarray, None]]:
"""
Used to extract the parameters for the motion primitive and other parameters from an action array which might

View File

@ -13,6 +13,7 @@ from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
from .mujoco.reacher.reacher import ReacherEnv
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
@ -192,7 +193,7 @@ register(
register(
id='BeerPong-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnv',
max_episode_steps=300,
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
@ -200,14 +201,14 @@ register(
register(
id='BeerPongStepBased-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
max_episode_steps=300,
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
# Beerpong with episodic reward, but fixed release time step
register(
id='BeerPongFixedRelease-v0',
entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep',
max_episode_steps=300,
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
)
# Motion Primitive Environments

View File

@ -6,6 +6,8 @@ import numpy as np
from gym import utils
from gym.envs.mujoco import MujocoEnv
MAX_EPISODE_STEPS_BEERPONG = 300
# XML Variables
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom",
@ -28,7 +30,7 @@ CUP_COLLISION_OBJ = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "c
class BeerPongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=2):
def __init__(self):
self._steps = 0
# Small Context -> Easier. Todo: Should we do different versions?
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
@ -45,9 +47,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
self._start_vel = np.zeros(7)
self.release_step = 100 # time step of ball release
self.ep_length = 600 // frame_skip
self.repeat_action = frame_skip
self.repeat_action = 2
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
self.model = None
self.site_id = lambda x: self.model.site_name2id(x)
@ -127,8 +128,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
if not crash:
reward, reward_infos = self._get_reward(applied_action)
is_collided = reward_infos['is_collided']
done = is_collided or self._steps == self.ep_length - 1
is_collided = reward_infos['is_collided'] # TODO: Remove if self collision does not make a difference
done = is_collided
self._steps += 1
else:
reward = -30
@ -182,7 +183,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
# Is this needed?
# self._is_collided = self._check_collision_with_itself([self.geom_id(name) for name in CUP_COLLISION_OBJ])
if self._steps == self.ep_length - 1: # or self._is_collided:
if self._steps == MAX_EPISODE_STEPS_BEERPONG-1: # or self._is_collided:
min_dist = np.min(self.dists)
final_dist = self.dists_final[-1]
if self.ball_ground_contact_first:
@ -251,14 +252,14 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
class BeerPongEnvFixedReleaseStep(BeerPongEnv):
def __init__(self, frame_skip=2):
super().__init__(frame_skip)
def __init__(self):
super().__init__()
self.release_step = 62 # empirically evaluated for frame_skip=2!
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
def __init__(self, frame_skip=2):
super().__init__(frame_skip)
def __init__(self):
super().__init__()
self.release_step = 62 # empirically evaluated for frame_skip=2!
def step(self, a):
@ -312,7 +313,7 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
if __name__ == "__main__":
env = BeerPongEnv(frame_skip=2)
env = BeerPongEnv()
env.seed(0)
# env = ALRBeerBongEnvStepBased(frame_skip=2)
# env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2)

View File

@ -28,7 +28,7 @@ class MPWrapper(RawInterfaceWrapper):
return self.env.data.qvel[0:7].copy()
# TODO: Fix this
def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if mp.learn_tau:
self.env.env.release_step = action[0] / self.env.dt # Tau value
return action, None

View File

@ -38,7 +38,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
super().__init__(env)
assert isinstance(env.observation_space, Box)
low = np.append(self.observation_space.low, 0.0)
high = np.append(self.observation_space.high, np.inf)
high = np.append(self.observation_space.high, 1.0)
self.observation_space = Box(low, high, dtype=self.observation_space.dtype)
self.t = 0
@ -51,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
Returns:
The observation with the time step appended to
"""
return np.append(observation, self.t)
return np.append(observation, self.t/self.env.spec.max_episode_steps)
def step(self, action):
"""Steps through the environment, incrementing the time step.