""" Process robomimic dataset and save it into our custom format so it can be loaded for diffusion training. Using some code from robomimic/robomimic/scripts/get_dataset_info.py can-mh: total transitions: 62756 total trajectories: 300 traj length mean: 209.18666666666667 traj length std: 114.42181532479817 traj length min: 98 traj length max: 1050 action min: -1.0 action max: 1.0 { "env_name": "PickPlaceCan", "env_version": "1.4.1", "type": 1, "env_kwargs": { "has_renderer": false, "has_offscreen_renderer": false, "ignore_done": true, "use_object_obs": true, "use_camera_obs": false, "control_freq": 20, "controller_configs": { "type": "OSC_POSE", "input_max": 1, "input_min": -1, "output_max": [ 0.05, 0.05, 0.05, 0.5, 0.5, 0.5 ], "output_min": [ -0.05, -0.05, -0.05, -0.5, -0.5, -0.5 ], "kp": 150, "damping": 1, "impedance_mode": "fixed", "kp_limits": [ 0, 300 ], "damping_limits": [ 0, 10 ], "position_limits": null, "orientation_limits": null, "uncouple_pos_ori": true, "control_delta": true, "interpolation": null, "ramp_ratio": 0.2 }, "robots": [ "Panda" ], "camera_depths": false, "camera_heights": 84, "camera_widths": 84, "reward_shaping": false } } robomimic dataset normalizes action to [-1, 1], observation roughly? to [-1, 1]. Seems sometimes the upper value is a bit larger than 1 (but within 1.1). """ import numpy as np from tqdm import tqdm import pickle try: import h5py # not included in pyproject.toml except: print("Installing h5py") os.system("pip install h5py") import os import random from copy import deepcopy import logging def make_dataset( load_path, save_dir, save_name_prefix, val_split, normalize, ): # Load hdf5 file from load_path with h5py.File(load_path, "r") as f: # put demonstration list in increasing episode order demos = sorted(list(f["data"].keys())) inds = np.argsort([int(elem[5:]) for elem in demos]) demos = [demos[i] for i in inds] if args.max_episodes > 0: demos = demos[: args.max_episodes] # From generate_paper_configs.py: default observation is eef pose, gripper finger position, and object information, all of which are low-dim. low_dim_obs_names = [ "robot0_eef_pos", "robot0_eef_quat", "robot0_gripper_qpos", ] if "transport" in load_path: low_dim_obs_names += [ "robot1_eef_pos", "robot1_eef_quat", "robot1_gripper_qpos", ] if args.cameras is None: # state-only low_dim_obs_names.append("object") obs_dim = 0 for low_dim_obs_name in low_dim_obs_names: dim = f["data/demo_0/obs/{}".format(low_dim_obs_name)].shape[1] obs_dim += dim logging.info(f"Using {low_dim_obs_name} with dim {dim} for observation") action_dim = f["data/demo_0/actions"].shape[1] logging.info(f"Total low-dim observation dim: {obs_dim}") logging.info(f"Action dim: {action_dim}") # get basic stats traj_lengths = [] obs_min = np.zeros((obs_dim)) obs_max = np.zeros((obs_dim)) action_min = np.zeros((action_dim)) action_max = np.zeros((action_dim)) for ep in demos: traj_lengths.append(f[f"data/{ep}/actions"].shape[0]) obs = np.hstack( [ f[f"data/{ep}/obs/{low_dim_obs_name}"][()] for low_dim_obs_name in low_dim_obs_names ] ) actions = f[f"data/{ep}/actions"] obs_min = np.minimum(obs_min, np.min(obs, axis=0)) obs_max = np.maximum(obs_max, np.max(obs, axis=0)) action_min = np.minimum(action_min, np.min(actions, axis=0)) action_max = np.maximum(action_max, np.max(actions, axis=0)) traj_lengths = np.array(traj_lengths) max_traj_length = np.max(traj_lengths) # report statistics on the data logging.info("===== Basic stats =====") logging.info("total transitions: {}".format(np.sum(traj_lengths))) logging.info("total trajectories: {}".format(traj_lengths.shape[0])) logging.info( f"traj length mean/std: {np.mean(traj_lengths)}, {np.std(traj_lengths)}" ) logging.info( f"traj length min/max: {np.min(traj_lengths)}, {np.max(traj_lengths)}" ) logging.info(f"obs min: {obs_min}") logging.info(f"obs max: {obs_max}") logging.info(f"action min: {action_min}") logging.info(f"action max: {action_max}") # deal with images if args.cameras is not None: img_shapes = [] img_names = [] # not necessary but keep old implementation for camera in args.cameras: if f"{camera}_image" in f["data/demo_0/obs"]: img_shape = f["data/demo_0/obs/{}_image".format(camera)].shape[1:] img_shapes.append(img_shape) img_names.append(f"{camera}_image") # ensure all images have the same height and width assert all( [ img_shape[0] == img_shapes[0][0] and img_shape[1] == img_shapes[0][1] for img_shape in img_shapes ] ) combined_img_shape = ( img_shapes[0][0], img_shapes[0][1], sum([img_shape[2] for img_shape in img_shapes]), ) logging.info(f"Image shapes: {img_shapes}") # split indices in train and val num_traj = len(traj_lengths) num_train = int(num_traj * (1 - val_split)) train_indices = random.sample(range(num_traj), k=num_train) # do over all indices out_train = {} keys = [ "observations", "actions", "rewards", ] if args.cameras is not None: keys.append("images") out_train["observations"] = np.empty((0, max_traj_length, obs_dim)) out_train["actions"] = np.empty((0, max_traj_length, action_dim)) out_train["rewards"] = np.empty((0, max_traj_length)) out_train["traj_length"] = [] if args.cameras is not None: out_train["images"] = np.empty( ( 0, max_traj_length, *combined_img_shape, ), dtype=np.uint8, ) out_val = deepcopy(out_train) train_episode_reward_all = [] val_episode_reward_all = [] for i in tqdm(range(len(demos))): ep = demos[i] if i in train_indices: out = out_train else: out = out_val # get episode length traj_length = f[f"data/{ep}"].attrs["num_samples"] out["traj_length"].append(traj_length) # print("Episode:", i, "Trajectory length:", traj_length) # extract raw_actions = f[f"data/{ep}/actions"][()] rewards = f[f"data/{ep}/rewards"][()] raw_obs = np.hstack( [ f[f"data/{ep}/obs/{low_dim_obs_name}"][()] for low_dim_obs_name in low_dim_obs_names ] ) # not normalized # scale to [-1, 1] for both ob and action if normalize: obs = 2 * (raw_obs - obs_min) / (obs_max - obs_min + 1e-6) - 1 actions = ( 2 * (raw_actions - action_min) / (action_max - action_min + 1e-6) - 1 ) else: obs = raw_obs actions = raw_actions data_traj = { "observations": obs, "actions": actions, "rewards": rewards, } if args.cameras is not None: # no normalization data_traj["images"] = np.concatenate( ( [ f["data/{}/obs/{}".format(ep, img_name)][()] for img_name in img_names ] ), axis=-1, ) # apply padding to make all episodes have the same max steps # later when we load this dataset, we will use the traj_length to slice the data for key in keys: traj = data_traj[key] if traj.ndim == 1: pad_width = (0, max_traj_length - len(traj)) elif traj.ndim == 2: pad_width = ((0, max_traj_length - traj.shape[0]), (0, 0)) elif traj.ndim == 4: pad_width = ( (0, max_traj_length - traj.shape[0]), (0, 0), (0, 0), (0, 0), ) else: raise ValueError("Unsupported dimension") traj = np.pad( traj, pad_width, mode="constant", constant_values=0, ) out[key] = np.vstack((out[key], traj[None])) # check reward if i in train_indices: train_episode_reward_all.append(np.sum(data_traj["rewards"])) else: val_episode_reward_all.append(np.sum(data_traj["rewards"])) # Save to np file save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz") save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz") with open(save_train_path, "wb") as f: pickle.dump(out_train, f) with open(save_val_path, "wb") as f: pickle.dump(out_val, f) if normalize: normalization_save_path = os.path.join( save_dir, save_name_prefix + "normalization.npz" ) np.savez( normalization_save_path, obs_min=obs_min, obs_max=obs_max, action_min=action_min, action_max=action_max, ) # debug logging.info("\n========== Final ===========") logging.info( f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}" ) logging.info( f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}" ) logging.info( f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}" ) logging.info( f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}" ) logging.info( f"Train - Mean/Std episode reward: {np.mean(train_episode_reward_all)}, {np.std(train_episode_reward_all)}" ) if val_split > 0: logging.info( f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}" ) logging.info( f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}" ) logging.info( f"Val - Mean/Std episode reward: {np.mean(val_episode_reward_all)}, {np.std(val_episode_reward_all)}" ) for obs_dim_ind in range(obs_dim): obs = out_train["observations"][:, :, obs_dim_ind] logging.info( f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" ) for action_dim_ind in range(action_dim): action = out_train["actions"][:, :, action_dim_ind] logging.info( f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" ) if val_split > 0: for obs_dim_ind in range(obs_dim): obs = out_val["observations"][:, :, obs_dim_ind] logging.info( f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" ) for action_dim_ind in range(action_dim): action = out_val["actions"][:, :, action_dim_ind] logging.info( f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" ) # logging.info("Train - Observation shape:", out_train["observations"].shape) # logging.info("Train - Action shape:", out_train["actions"].shape) # logging.info("Train - Reward shape:", out_train["rewards"].shape) # logging.info("Val - Observation shape:", out_val["observations"].shape) # logging.info("Val - Action shape:", out_val["actions"].shape) # logging.info("Val - Reward shape:", out_val["rewards"].shape) # if use_img: # logging.info("Image shapes:", img_shapes) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--load_path", type=str, default=".") parser.add_argument("--save_dir", type=str, default=".") parser.add_argument("--save_name_prefix", type=str, default="") parser.add_argument("--val_split", type=float, default="0.2") parser.add_argument("--max_episodes", type=int, default="-1") parser.add_argument("--normalize", action="store_true") parser.add_argument("--cameras", nargs="*", default=None) args = parser.parse_args() import datetime if args.max_episodes > 0: args.save_name_prefix += f"max_episodes_{args.max_episodes}_" os.makedirs(args.save_dir, exist_ok=True) log_path = os.path.join( args.save_dir, args.save_name_prefix + f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log", ) logging.basicConfig( level=logging.INFO, format="%(message)s", handlers=[ logging.FileHandler(log_path, mode="w"), logging.StreamHandler(), ], ) make_dataset( args.load_path, args.save_dir, args.save_name_prefix, args.val_split, args.normalize, )