enable max planning times flag

2022-11-09 21:23:35 +01:00 · 2022-11-09 21:23:35 +01:00 · b1581634e0
commit b1581634e0
parent 99a514026f
3 changed files with 6 additions and 4 deletions
--- a/fancy_gym/black_box/black_box_wrapper.py
+++ b/fancy_gym/black_box/black_box_wrapper.py
@ -219,8 +219,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
                if self.render_kwargs:
                    self.env.render(**self.render_kwargs)

-                if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
-                                                    t + 1 + self.current_traj_steps):
+                if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
+                                                        t + 1 + self.current_traj_steps)
+                        and self.max_planning_times is not None and self.plan_counts < self.max_planning_times):

                        # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times:
                        #     continue
--- a/fancy_gym/examples/example_replanning.py
+++ b/fancy_gym/examples/example_replanning.py
--- a/fancy_gym/examples/examples_movement_primitives.py
+++ b/fancy_gym/examples/examples_movement_primitives.py
@ -46,6 +46,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
        # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal
        # to the return of a trajectory. Default is the sum over the step-wise rewards.
        obs, reward, done, info = env.step(ac)
+        print(f'steps: {info["num_steps"][-1]}')
        # Aggregated returns
        returns += reward