enable max planning times flag

This commit is contained in:
Hongyi Zhou 2022-11-09 21:23:35 +01:00
parent 99a514026f
commit b1581634e0
3 changed files with 6 additions and 4 deletions

View File

@ -219,8 +219,9 @@ class BlackBoxWrapper(gym.ObservationWrapper):
if self.render_kwargs: if self.render_kwargs:
self.env.render(**self.render_kwargs) self.env.render(**self.render_kwargs)
if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action, if done or (self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
t + 1 + self.current_traj_steps): t + 1 + self.current_traj_steps)
and self.max_planning_times is not None and self.plan_counts < self.max_planning_times):
# if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times: # if self.max_planning_times is not None and self.plan_counts >= self.max_planning_times:
# continue # continue

View File

View File

@ -46,6 +46,7 @@ def example_mp(env_name="HoleReacherProMP-v0", seed=1, iterations=1, render=True
# full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal # full observation space of the last step, if replanning/sub-trajectory learning is used. The 'reward' is equal
# to the return of a trajectory. Default is the sum over the step-wise rewards. # to the return of a trajectory. Default is the sum over the step-wise rewards.
obs, reward, done, info = env.step(ac) obs, reward, done, info = env.step(ac)
print(f'steps: {info["num_steps"][-1]}')
# Aggregated returns # Aggregated returns
returns += reward returns += reward