remove minor attributes from BP
This commit is contained in:
parent
14ee580473
commit
ce00996782
@ -122,7 +122,7 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
||||
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
||||
|
||||
# TODO remove this part, right now only needed for beer pong
|
||||
mp_params, env_spec_params = self.env._episode_callback(action, self.traj_gen)
|
||||
mp_params, env_spec_params = self.env.episode_callback(action, self.traj_gen)
|
||||
trajectory, velocity = self.get_trajectory(mp_params)
|
||||
|
||||
trajectory_length = len(trajectory)
|
||||
|
@ -52,12 +52,7 @@ class RawInterfaceWrapper(gym.Wrapper):
|
||||
"""
|
||||
return self.env.dt
|
||||
|
||||
def do_replanning(self, pos, vel, s, a, t):
|
||||
# return t % 100 == 0
|
||||
# return bool(self.replanning_model(s))
|
||||
return False
|
||||
|
||||
def _episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
|
||||
def episode_callback(self, action: np.ndarray, traj_gen: MPInterface) -> Tuple[
|
||||
np.ndarray, Union[np.ndarray, None]]:
|
||||
"""
|
||||
Used to extract the parameters for the motion primitive and other parameters from an action array which might
|
||||
|
@ -13,6 +13,7 @@ from .mujoco.hopper_jump.hopper_jump import MAX_EPISODE_STEPS_HOPPERJUMP
|
||||
from .mujoco.hopper_jump.hopper_jump_on_box import MAX_EPISODE_STEPS_HOPPERJUMPONBOX
|
||||
from .mujoco.hopper_throw.hopper_throw import MAX_EPISODE_STEPS_HOPPERTHROW
|
||||
from .mujoco.hopper_throw.hopper_throw_in_basket import MAX_EPISODE_STEPS_HOPPERTHROWINBASKET
|
||||
from .mujoco.beerpong.beerpong import MAX_EPISODE_STEPS_BEERPONG
|
||||
from .mujoco.reacher.reacher import ReacherEnv
|
||||
from .mujoco.walker_2d_jump.walker_2d_jump import MAX_EPISODE_STEPS_WALKERJUMP
|
||||
|
||||
@ -192,7 +193,7 @@ register(
|
||||
register(
|
||||
id='BeerPong-v0',
|
||||
entry_point='alr_envs.envs.mujoco:BeerPongEnv',
|
||||
max_episode_steps=300,
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
# Here we use the same reward as in BeerPong-v0, but now consider after the release,
|
||||
@ -200,14 +201,14 @@ register(
|
||||
register(
|
||||
id='BeerPongStepBased-v0',
|
||||
entry_point='alr_envs.envs.mujoco:BeerPongEnvStepBasedEpisodicReward',
|
||||
max_episode_steps=300,
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
# Beerpong with episodic reward, but fixed release time step
|
||||
register(
|
||||
id='BeerPongFixedRelease-v0',
|
||||
entry_point='alr_envs.envs.mujoco:BeerPongEnvFixedReleaseStep',
|
||||
max_episode_steps=300,
|
||||
max_episode_steps=MAX_EPISODE_STEPS_BEERPONG,
|
||||
)
|
||||
|
||||
# Motion Primitive Environments
|
||||
|
@ -6,6 +6,8 @@ import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import MujocoEnv
|
||||
|
||||
MAX_EPISODE_STEPS_BEERPONG = 300
|
||||
|
||||
# XML Variables
|
||||
ROBOT_COLLISION_OBJ = ["wrist_palm_link_convex_geom",
|
||||
"wrist_pitch_link_convex_decomposition_p1_geom",
|
||||
@ -28,7 +30,7 @@ CUP_COLLISION_OBJ = ["cup_geom_table3", "cup_geom_table4", "cup_geom_table5", "c
|
||||
|
||||
|
||||
class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
def __init__(self, frame_skip=2):
|
||||
def __init__(self):
|
||||
self._steps = 0
|
||||
# Small Context -> Easier. Todo: Should we do different versions?
|
||||
# self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||
@ -45,9 +47,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
self._start_vel = np.zeros(7)
|
||||
|
||||
self.release_step = 100 # time step of ball release
|
||||
self.ep_length = 600 // frame_skip
|
||||
|
||||
self.repeat_action = frame_skip
|
||||
self.repeat_action = 2
|
||||
# TODO: If accessing IDs is easier in the (new) official mujoco bindings, remove this
|
||||
self.model = None
|
||||
self.site_id = lambda x: self.model.site_name2id(x)
|
||||
@ -127,8 +128,8 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
if not crash:
|
||||
reward, reward_infos = self._get_reward(applied_action)
|
||||
is_collided = reward_infos['is_collided']
|
||||
done = is_collided or self._steps == self.ep_length - 1
|
||||
is_collided = reward_infos['is_collided'] # TODO: Remove if self collision does not make a difference
|
||||
done = is_collided
|
||||
self._steps += 1
|
||||
else:
|
||||
reward = -30
|
||||
@ -182,7 +183,7 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
# Is this needed?
|
||||
# self._is_collided = self._check_collision_with_itself([self.geom_id(name) for name in CUP_COLLISION_OBJ])
|
||||
|
||||
if self._steps == self.ep_length - 1: # or self._is_collided:
|
||||
if self._steps == MAX_EPISODE_STEPS_BEERPONG-1: # or self._is_collided:
|
||||
min_dist = np.min(self.dists)
|
||||
final_dist = self.dists_final[-1]
|
||||
if self.ball_ground_contact_first:
|
||||
@ -251,14 +252,14 @@ class BeerPongEnv(MujocoEnv, utils.EzPickle):
|
||||
|
||||
|
||||
class BeerPongEnvFixedReleaseStep(BeerPongEnv):
|
||||
def __init__(self, frame_skip=2):
|
||||
super().__init__(frame_skip)
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||
|
||||
|
||||
class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
|
||||
def __init__(self, frame_skip=2):
|
||||
super().__init__(frame_skip)
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.release_step = 62 # empirically evaluated for frame_skip=2!
|
||||
|
||||
def step(self, a):
|
||||
@ -312,7 +313,7 @@ class BeerPongEnvStepBasedEpisodicReward(BeerPongEnv):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
env = BeerPongEnv(frame_skip=2)
|
||||
env = BeerPongEnv()
|
||||
env.seed(0)
|
||||
# env = ALRBeerBongEnvStepBased(frame_skip=2)
|
||||
# env = ALRBeerBongEnvStepBasedEpisodicReward(frame_skip=2)
|
||||
|
@ -28,7 +28,7 @@ class MPWrapper(RawInterfaceWrapper):
|
||||
return self.env.data.qvel[0:7].copy()
|
||||
|
||||
# TODO: Fix this
|
||||
def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||
if mp.learn_tau:
|
||||
self.env.env.release_step = action[0] / self.env.dt # Tau value
|
||||
return action, None
|
||||
|
@ -38,7 +38,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
|
||||
super().__init__(env)
|
||||
assert isinstance(env.observation_space, Box)
|
||||
low = np.append(self.observation_space.low, 0.0)
|
||||
high = np.append(self.observation_space.high, np.inf)
|
||||
high = np.append(self.observation_space.high, 1.0)
|
||||
self.observation_space = Box(low, high, dtype=self.observation_space.dtype)
|
||||
self.t = 0
|
||||
|
||||
@ -51,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
|
||||
Returns:
|
||||
The observation with the time step appended to
|
||||
"""
|
||||
return np.append(observation, self.t)
|
||||
return np.append(observation, self.t/self.env.spec.max_episode_steps)
|
||||
|
||||
def step(self, action):
|
||||
"""Steps through the environment, incrementing the time step.
|
||||
|
Loading…
Reference in New Issue
Block a user