minor bug fixes
This commit is contained in:
parent
ce795669a9
commit
4aacd71ed3
@ -75,19 +75,24 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
|
clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high)
|
||||||
self.traj_gen.set_params(clipped_params)
|
self.traj_gen.set_params(clipped_params)
|
||||||
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
# TODO: is this correct for replanning? Do we need to adjust anything here?
|
||||||
self.traj_gen.set_boundary_conditions(
|
bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt)
|
||||||
bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]),
|
self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel)
|
||||||
bc_pos=self.current_pos, bc_vel=self.current_vel)
|
# TODO: remove the - self.dt after Bruces fix.
|
||||||
# TODO remove the - self.dt after Bruces fix.
|
|
||||||
self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration - self.dt, self.dt)
|
self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration - self.dt, self.dt)
|
||||||
traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
|
# traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
|
||||||
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
trajectory = get_numpy(self.traj_gen.get_traj_pos())
|
||||||
|
velocity = get_numpy(self.traj_gen.get_traj_vel())
|
||||||
|
|
||||||
return get_numpy(trajectory_tensor), get_numpy(velocity_tensor)
|
if self.do_replanning:
|
||||||
|
# Remove first part of trajectory as this is already over
|
||||||
|
trajectory = trajectory[self.current_traj_steps:]
|
||||||
|
velocity = velocity[self.current_traj_steps:]
|
||||||
|
|
||||||
|
return trajectory, velocity
|
||||||
|
|
||||||
def _get_traj_gen_action_space(self):
|
def _get_traj_gen_action_space(self):
|
||||||
"""This function can be used to set up an individual space for the parameters of the traj_gen."""
|
"""This function can be used to set up an individual space for the parameters of the traj_gen."""
|
||||||
min_action_bounds, max_action_bounds = self.traj_gen.get_params_bounds().t()
|
min_action_bounds, max_action_bounds = self.traj_gen.get_params_bounds()
|
||||||
action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||||
dtype=self.env.action_space.dtype)
|
dtype=self.env.action_space.dtype)
|
||||||
return action_space
|
return action_space
|
||||||
@ -105,13 +110,13 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
return self._get_traj_gen_action_space()
|
return self._get_traj_gen_action_space()
|
||||||
|
|
||||||
def _get_observation_space(self):
|
def _get_observation_space(self):
|
||||||
|
if self.return_context_observation:
|
||||||
mask = self.env.context_mask
|
mask = self.env.context_mask
|
||||||
if not self.return_context_observation:
|
|
||||||
# return full observation
|
# return full observation
|
||||||
mask = np.ones_like(mask, dtype=bool)
|
|
||||||
min_obs_bound = self.env.observation_space.low[mask]
|
min_obs_bound = self.env.observation_space.low[mask]
|
||||||
max_obs_bound = self.env.observation_space.high[mask]
|
max_obs_bound = self.env.observation_space.high[mask]
|
||||||
return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype)
|
return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype)
|
||||||
|
return self.env.observation_space
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
||||||
@ -152,18 +157,18 @@ class BlackBoxWrapper(gym.ObservationWrapper):
|
|||||||
t + 1 + self.current_traj_steps):
|
t + 1 + self.current_traj_steps):
|
||||||
break
|
break
|
||||||
|
|
||||||
infos.update({k: v[:t + 1] for k, v in infos.items()})
|
infos.update({k: v[:t] for k, v in infos.items()})
|
||||||
self.current_traj_steps += t + 1
|
self.current_traj_steps += t + 1
|
||||||
|
|
||||||
if self.verbose >= 2:
|
if self.verbose >= 2:
|
||||||
infos['positions'] = trajectory
|
infos['positions'] = trajectory
|
||||||
infos['velocities'] = velocity
|
infos['velocities'] = velocity
|
||||||
infos['step_actions'] = actions[:t]
|
infos['step_actions'] = actions[:t + 1]
|
||||||
infos['step_observations'] = observations[:t + 1]
|
infos['step_observations'] = observations[:t + 1]
|
||||||
infos['step_rewards'] = rewards[:t]
|
infos['step_rewards'] = rewards[:t + 1]
|
||||||
|
|
||||||
infos['trajectory_length'] = t + 1
|
infos['trajectory_length'] = t + 1
|
||||||
trajectory_return = self.reward_aggregation(rewards[:t])
|
trajectory_return = self.reward_aggregation(rewards[:t + 1])
|
||||||
return self.observation(obs), trajectory_return, done, infos
|
return self.observation(obs), trajectory_return, done, infos
|
||||||
|
|
||||||
def render(self, **kwargs):
|
def render(self, **kwargs):
|
||||||
|
@ -40,9 +40,10 @@ class TimeAwareObservation(gym.ObservationWrapper):
|
|||||||
high = np.append(self.observation_space.high, 1.0)
|
high = np.append(self.observation_space.high, 1.0)
|
||||||
self.observation_space = Box(low, high, dtype=self.observation_space.dtype)
|
self.observation_space = Box(low, high, dtype=self.observation_space.dtype)
|
||||||
self.t = 0
|
self.t = 0
|
||||||
|
self._max_episode_steps = env.spec.max_episode_steps
|
||||||
|
|
||||||
def observation(self, observation):
|
def observation(self, observation):
|
||||||
"""Adds to the observation with the current time step.
|
"""Adds to the observation with the current time step normalized with max steps.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
observation: The observation to add the time step to
|
observation: The observation to add the time step to
|
||||||
@ -50,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper):
|
|||||||
Returns:
|
Returns:
|
||||||
The observation with the time step appended to
|
The observation with the time step appended to
|
||||||
"""
|
"""
|
||||||
return np.append(observation, self.t/self.env.spec.max_episode_steps)
|
return np.append(observation, self.t / self._max_episode_steps)
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
"""Steps through the environment, incrementing the time step.
|
"""Steps through the environment, incrementing the time step.
|
||||||
|
Loading…
Reference in New Issue
Block a user