use desired point as boundary condition

2022-10-25 22:15:30 +02:00 · 2022-10-25 22:15:30 +02:00 · a1d96e6016
commit a1d96e6016
parent 556bfd0b35
3 changed files with 61 additions and 5 deletions
--- a/fancy_gym/black_box/black_box_wrapper.py
+++ b/fancy_gym/black_box/black_box_wrapper.py
@ -21,7 +21,8 @@ class BlackBoxWrapper(gym.ObservationWrapper):
                 learn_sub_trajectories: bool = False,
                 replanning_schedule: Optional[
                     Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int], bool]] = None,
-                 reward_aggregation: Callable[[np.ndarray], float] = np.sum
+                 reward_aggregation: Callable[[np.ndarray], float] = np.sum,
                 desired_conditioning: bool = False
                 ):
        """
        gym.Wrapper for leveraging a black box approach with a trajectory generator.
@ -67,6 +68,11 @@ class BlackBoxWrapper(gym.ObservationWrapper):
        self.render_kwargs = {}
        self.verbose = verbose
        # condition value
        self.desired_conditioning = True
        self.condition_pos = None
        self.condition_vel = None
    def observation(self, observation):
        # return context space if we are
        if self.return_context_observation:
@ -87,7 +93,11 @@ class BlackBoxWrapper(gym.ObservationWrapper):
        bc_time = np.array(0 if not self.do_replanning else self.current_traj_steps * self.dt)
        # TODO we could think about initializing with the previous desired value in order to have a smooth transition
        #  at least from the planning point of view.
-        self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel)
+        # self.traj_gen.set_boundary_conditions(bc_time, self.current_pos, self.current_vel)
        if self.current_traj_steps == 0:
            self.condition_pos = self.current_pos
            self.condition_vel = self.current_vel
        self.traj_gen.set_boundary_conditions(bc_time, self.condition_pos, self.condition_vel)
        self.traj_gen.set_duration(duration, self.dt)
        # traj_dict = self.traj_gen.get_trajs(get_pos=True, get_vel=True)
        position = get_numpy(self.traj_gen.get_traj_pos())
@ -165,14 +175,22 @@ class BlackBoxWrapper(gym.ObservationWrapper):
            if done or self.replanning_schedule(self.current_pos, self.current_vel, obs, c_action,
                                                t + 1 + self.current_traj_steps):
                if self.desired_conditioning:
                    self.condition_pos = pos
                    self.condition_vel = vel
                else:
                    self.condition_pos = self.current_pos
                    self.condition_vel = self.current_vel
                break
        infos.update({k: v[:t+1] for k, v in infos.items()})
        self.current_traj_steps += t + 1
        if self.verbose >= 2:
-            infos['positions'] = position
+            infos['desired_pos'] = position[:t+1]
-            infos['velocities'] = velocity
+            infos['desired_vel'] = velocity[:t+1]
            infos['current_pos'] = self.current_pos
            infos['current_vel'] = self.current_vel
            infos['step_actions'] = actions[:t + 1]
            infos['step_observations'] = observations[:t + 1]
            infos['step_rewards'] = rewards[:t + 1]
--- a/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py
+++ b/fancy_gym/envs/mujoco/box_pushing/box_pushing_env.py
@ -360,7 +360,7 @@ class BoxPushingTemporalSpatialSparse(BoxPushingEnvBase):
 if __name__=="__main__":
    env = BoxPushingTemporalSpatialSparse(frame_skip=10)
    env.reset()
-    for i in range(1):
+    for i in range(10):
        env.reset()
        for _ in range(100):
            env.render("human")
--- a/fancy_gym/examples/example_replanning_envs.py
+++ b/fancy_gym/examples/example_replanning_envs.py
@ -0,0 +1,38 @@
 import fancy_gym
 import numpy as np
 import matplotlib.pyplot as plt
 def plot_trajectory(traj):
    plt.figure()
    plt.plot(traj[:, 3])
    plt.legend()
    plt.show()
 def run_replanning_envs(env_name="BoxPushingProDMP-v0", seed=1, iterations=1, render=True):
    env = fancy_gym.make(env_name, seed=seed)
    env.reset()
    for i in range(iterations):
        done = False
        desired_pos_traj = np.zeros((100, 7))
        desired_vel_traj = np.zeros((100, 7))
        real_pos_traj = np.zeros((100, 7))
        real_vel_traj = np.zeros((100, 7))
        t = 0
        while done is False:
            ac = env.action_space.sample()
            obs, reward, done, info = env.step(ac)
            desired_pos_traj[t: t + 25, :] = info['desired_pos']
            desired_vel_traj[t: t + 25, :] = info['desired_vel']
            # real_pos_traj.append(info['current_pos'])
            # real_vel_traj.append(info['current_vel'])
            t += 25
            if render:
                env.render(mode="human")
            if done:
                env.reset()
        plot_trajectory(desired_pos_traj)
    env.close()
    del env
 if __name__ == "__main__":
    run_replanning_envs(env_name="BoxPushingDenseProDMP-v0", seed=1, iterations=1, render=False)