From b1581634e052122c9d7defa29e763223aec30a37 Mon Sep 17 00:00:00 2001 From: Hongyi Zhou Date: Wed, 9 Nov 2022 21:23:35 +0100 Subject: [PATCH] enable max planning times flag --- fancy_gym/black_box/black_box_wrapper.py | 9 +++++---- fancy_gym/examples/example_replanning.py | 0 fancy_gym/examples/examples_movement_primitives.py | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 fancy_gym/examples/example_replanning.py diff --git a/fancy_gym/black_box/black_box_wrapper.py b/fancy_gym/black_box/black_box_wrapper.py index b99d138..d428184 100644 --- a/fancy_gym/black_box/black_box_wrapper.py +++ b/fancy_gym/black_box/black_box_wrapper.py @@ -219,11 +219,12 @@ class BlackBoxWrapper(gym.ObservationWrapper): if self.render_kwargs: self.env.render(**self.render_kwargs) - if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, - t + 1 + self.current_traj_steps): + if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, + t + 1 + self.current_traj_steps) + and self.max_planning_times is not None and self.plan_counts < self.max_planning_times): - # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: - # continue + # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: + # continue self.condition_pos = pos if self.desired_conditioning else self.current_pos self.condition_vel = vel if self.desired_conditioning else self.current_vel diff --git a/fancy_gym/examples/example_replanning.py b/fancy_gym/examples/example_replanning.py new file mode 100644 index 0000000..e69de29 diff --git a/fancy_gym/examples/examples_movement_primitives.py b/fancy_gym/examples/examples_movement_primitives.py index 8d9cecc..1cda812 100644 --- a/fancy_gym/examples/examples_movement_primitives.py +++ b/fancy_gym/examples/examples_movement_primitives.py @@ -46,6 +46,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal # to the return of a trajectory. Default is the sum over the step-wise rewards. obs, reward, done, info = env.step(ac) + print(f'steps: {info["num_steps"][-1]}') # Aggregated returns returns += reward