diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index b99d138..d428184 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -219,11 +219,12 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps): + if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + t + 1 + self.current_traj_steps) + and self.max_planning_times is not None and self.plan_counts < self.max_planning_times): - # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: - # continue + # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: + # continue self.condition_pos = pos if self.desired_conditioning else self.current_pos self.condition_vel = vel if self.desired_conditioning else self.current_vel diff --git a/fancy_gym/examples/example_replanning.py b/fancy_gym/examples/example_replanning.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 8d9cecc..1cda812 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -46,6 +46,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal # to the return of a trajectory. Default is the sum over the step-wise rewards. obs, reward, done, info = env.step(ac) + print(f'steps: {info["num_steps"][-1]}') # Aggregated returns returns += reward