diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index f7cfb4e..fb7328c 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -75,19 +75,24 @@ class BlackBoxWrapper(gym.ObservationWrapper): clipped_params = np.clip(action, self.traj_gen_action_space.low, self.traj_gen_action_space.high) self.traj_gen.set_params(clipped_params) # TODO: is this correct for replanning? Do we need to adjust anything here? - self.traj_gen.set_boundary_conditions( - bc_time=np.array(0) if not self.do_replanning else np.array([self.current_traj_steps * self.dt]), - bc_pos=self.current_pos, bc_vel=self.current_vel) - # TODO remove the - self.dt after Bruces fix. + bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt) + self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel) + # TODO: remove the - self.dt after Bruces fix. self.traj_gen.set_duration(None if self.learn_sub_trajectories else self.duration - self.dt, self.dt) - traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) - trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel'] + # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True) + trajectory = get_numpy(self.traj_gen.get_traj_pos()) + velocity = get_numpy(self.traj_gen.get_traj_vel()) - return get_numpy(trajectory_tensor), get_numpy(velocity_tensor) + if self.do_replanning: + # Remove first part of trajectory as this is already over + trajectory = trajectory[self.current_traj_steps:] + velocity = velocity[self.current_traj_steps:] + + return trajectory, velocity def _get_traj_gen_action_space(self): """This function can be used to set up an individual space for the parameters of the traj_gen.""" - min_action_bounds, max_action_bounds = self.traj_gen.get_params_bounds().t() + min_action_bounds, max_action_bounds = self.traj_gen.get_params_bounds() action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(), dtype=self.env.action_space.dtype) return action_space @@ -105,13 +110,13 @@ class BlackBoxWrapper(gym.ObservationWrapper): return self._get_traj_gen_action_space() def _get_observation_space(self): - mask = self.env.context_mask - if not self.return_context_observation: + if self.return_context_observation: + mask = self.env.context_mask # return full observation - mask = np.ones_like(mask, dtype=bool) - min_obs_bound = self.env.observation_space.low[mask] - max_obs_bound = self.env.observation_space.high[mask] - return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype) + min_obs_bound = self.env.observation_space.low[mask] + max_obs_bound = self.env.observation_space.high[mask] + return spaces.Box(low=min_obs_bound, high=max_obs_bound, dtype=self.env.observation_space.dtype) + return self.env.observation_space def step(self, action: np.ndarray): """ This function generates a trajectory based on a MP and then does the usual loop over reset and step""" @@ -152,18 +157,18 @@ class BlackBoxWrapper(gym.ObservationWrapper): t + 1 + self.current_traj_steps): break - infos.update({k: v[:t + 1] for k, v in infos.items()}) + infos.update({k: v[:t] for k, v in infos.items()}) self.current_traj_steps += t + 1 if self.verbose >= 2: infos['positions'] = trajectory infos['velocities'] = velocity - infos['step_actions'] = actions[:t] + infos['step_actions'] = actions[:t + 1] infos['step_observations'] = observations[:t + 1] - infos['step_rewards'] = rewards[:t] + infos['step_rewards'] = rewards[:t + 1] infos['trajectory_length'] = t + 1 - trajectory_return = self.reward_aggregation(rewards[:t]) + trajectory_return = self.reward_aggregation(rewards[:t + 1]) return self.observation(obs), trajectory_return, done, infos def render(self, **kwargs): diff --git a/fancy_gym/utils/time_aware_observation.py b/fancy_gym/utils/time_aware_observation.py index 4fa0d74..b2cbc78 100644 --- a/fancy_gym/utils/time_aware_observation.py +++ b/fancy_gym/utils/time_aware_observation.py @@ -40,9 +40,10 @@ class TimeAwareObservation(gym.ObservationWrapper): high = np.append(self.observation_space.high, 1.0) self.observation_space = Box(low, high, dtype=self.observation_space.dtype) self.t = 0 + self._max_episode_steps = env.spec.max_episode_steps def observation(self, observation): - """Adds to the observation with the current time step. + """Adds to the observation with the current time step normalized with max steps. Args: observation: The observation to add the time step to @@ -50,7 +51,7 @@ class TimeAwareObservation(gym.ObservationWrapper): Returns: The observation with the time step appended to """ - return np.append(observation, self.t/self.env.spec.max_episode_steps) + return np.append(observation, self.t / self._max_episode_steps) def step(self, action): """Steps through the environment, incrementing the time step.