diff --git a/script/dataset/README.md b/script/dataset/README.md index 0b92d8a..126879e 100644 --- a/script/dataset/README.md +++ b/script/dataset/README.md @@ -15,4 +15,10 @@ python download_datasets.py --tasks all --dataset_types mh --hdf5_types low_dim python download_datasets.py --tasks all --dataset_types mh --hdf5_types raw # pixel-based policy # for pixel, replay the trajectories to extract image observations python robomimic/scripts/dataset_states_to_obs.py --done_mode 2 --dataset datasets/can/mh/demo_v141.hdf5 --output_name image_v141.hdf5 --camera_names robot0_eye_in_hand --camera_height 96 --camera_width 96 --exclude-next-obs --n 100 +``` + +D3IL data: first download the raw data from [D3IL](https://github.com/ALRhub/d3il), see the Google Drive link +```console +python script/dataset/process_d3il_dataset.py --load_path= --env_type=avoid # save all data +python script/dataset/filter_d3il_avoid_data.py --load_path= --desired_modes ... --required_modes ... # filter modes ``` \ No newline at end of file diff --git a/script/dataset/filter_d3il_avoid_data.py b/script/dataset/filter_d3il_avoid_data.py index 16547d7..5467ee6 100644 --- a/script/dataset/filter_d3il_avoid_data.py +++ b/script/dataset/filter_d3il_avoid_data.py @@ -7,7 +7,6 @@ Trajectories are normalized with filtered data, not the original data. import os import numpy as np from tqdm import tqdm -import pickle import random import matplotlib.pyplot as plt from copy import deepcopy @@ -91,10 +90,8 @@ def make_dataset( full_traj_lengths = np.array(full_traj_lengths) # take the max and min of obs and action - obs_min = np.zeros((obs_dim)) - obs_max = np.zeros((obs_dim)) - action_min = np.zeros((action_dim)) - action_max = np.zeros((action_dim)) + obs_all = np.empty((0, obs_dim)) + action_all = np.empty((0, action_dim)) chosen_indices = [] for i in tqdm(range(len(masks))): T = full_traj_lengths[i] @@ -125,21 +122,18 @@ def make_dataset( continue chosen_indices.append(i) - obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0) - obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0) - action_min = np.min( - np.vstack((action_min, np.min(action_traj, axis=0))), axis=0 - ) - action_max = np.max( - np.vstack((action_max, np.max(action_traj, axis=0))), axis=0 - ) + obs_all = np.vstack((obs_all, obs_traj)) + action_all = np.vstack((action_all, action_traj)) + obs_min = np.min(obs_all, axis=0) + obs_max = np.max(obs_all, axis=0) + action_min = np.min(action_all, axis=0) + action_max = np.max(action_all, axis=0) if len(chosen_indices) == 0: raise ValueError("No data found for the desired/required modes") chosen_indices = np.array(chosen_indices) traj_lengths = full_traj_lengths[chosen_indices] actions = demo_dataset.actions[chosen_indices] obs = demo_dataset.observations[chosen_indices] - max_traj_length = np.max(traj_lengths) # split indices in train and val num_traj = len(traj_lengths) @@ -159,17 +153,13 @@ def make_dataset( logger.info(f"action max: {action_max}") # do over all indices - out_train = {} - keys = [ - "observations", - "actions", - "rewards", - ] - total_timesteps = actions.shape[1] - out_train["observations"] = np.empty((0, total_timesteps, obs_dim)) - out_train["actions"] = np.empty((0, total_timesteps, action_dim)) - out_train["rewards"] = np.empty((0, total_timesteps)) - out_train["traj_length"] = [] + out_train = { + "states": [], + "actions": [], + "rewards": [], + "terminals": [], + "traj_lengths": [], + } out_val = deepcopy(out_train) for i in tqdm(range(len(traj_lengths))): if i in train_indices: @@ -177,8 +167,8 @@ def make_dataset( else: out = out_val T = traj_lengths[i] - obs_traj = obs[i].numpy() - action_traj = actions[i].numpy() + obs_traj = obs[i, :T].numpy() + action_traj = actions[i, :T].numpy() # scale to [-1, 1] for both ob and action obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1 @@ -186,20 +176,21 @@ def make_dataset( 2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1 ) - # get episode length - traj_length = T - out["traj_length"].append(traj_length) + out["states"].append(obs_traj) + out["actions"].append(action_traj) + out["rewards"].append(np.zeros(T)) # no reward from d3il dataset + out["terminals"].append( + np.zeros(T, dtype=bool) + ) # no terminal from d3il dataset + out["traj_lengths"].append(T) - # extract - rewards = np.zeros(total_timesteps) # no reward from d3il dataset - data_traj = { - "observations": obs_traj, - "actions": action_traj, - "rewards": rewards, - } - for key in keys: - traj = data_traj[key] - out[key] = np.vstack((out[key], traj[None])) + # Concatenate trajectories + for key in ["states", "actions", "rewards"]: + out_train[key] = np.concatenate(out_train[key], axis=0) + + # Only concatenate validation set if it exists + if val_split > 0: + out_val[key] = np.concatenate(out_val[key], axis=0) # plot all trajectories and save in a figure def plot(out, name): @@ -219,14 +210,16 @@ def make_dataset( pillar_xys = get_obj_xy_list() fig = plt.figure() - all_trajs = out["observations"] # num x timestep x obs - for traj, traj_length in zip(all_trajs, out["traj_length"]): + prev_index = 0 + for traj_length in out["traj_lengths"]: + traj = out["states"][prev_index : traj_length + prev_index].copy() # unnormalize traj = (traj + 1) / 2 # [-1, 1] -> [0, 1] traj = traj * (obs_max - obs_min) + obs_min plt.plot( traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3) ) + prev_index += traj_length plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-") for xy in pillar_xys: circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True) @@ -247,10 +240,20 @@ def make_dataset( # Save to np file save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz") save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz") - with open(save_train_path, "wb") as f: - pickle.dump(out_train, f) - with open(save_val_path, "wb") as f: - pickle.dump(out_val, f) + np.savez_compressed( + save_train_path, + states=np.array(out_train["states"]), + actions=np.array(out_train["actions"]), + rewards=np.array(out_train["rewards"]), + traj_lengths=np.array(out_train["traj_lengths"]), + ) + np.savez_compressed( + save_val_path, + states=np.array(out_val["states"]), + actions=np.array(out_val["actions"]), + rewards=np.array(out_val["rewards"]), + traj_lengths=np.array(out_val["traj_lengths"]), + ) normalization_save_path = os.path.join( save_dir, save_name_prefix + "normalization.npz" ) @@ -265,42 +268,42 @@ def make_dataset( # debug logger.info("\n========== Final ===========") logger.info( - f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}" + f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}" ) logger.info( - f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}" + f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}" ) logger.info( - f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}" + f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}" ) logger.info( - f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}" + f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}" ) if val_split > 0: logger.info( - f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}" + f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}" ) logger.info( - f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}" + f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}" ) for obs_dim_ind in range(obs_dim): - obs = out_train["observations"][:, :, obs_dim_ind] + obs = out_train["states"][:, obs_dim_ind] logger.info( f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" ) for action_dim_ind in range(action_dim): - action = out_train["actions"][:, :, action_dim_ind] + action = out_train["actions"][:, action_dim_ind] logger.info( f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" ) if val_split > 0: for obs_dim_ind in range(obs_dim): - obs = out_val["observations"][:, :, obs_dim_ind] + obs = out_val["states"][:, obs_dim_ind] logger.info( f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" ) for action_dim_ind in range(action_dim): - action = out_val["actions"][:, :, action_dim_ind] + action = out_val["actions"][:, action_dim_ind] logger.info( f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" ) @@ -335,7 +338,7 @@ if __name__ == "__main__": args.save_name_prefix + f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log", ) - logger = logging.getLogger("get_D4RL_dataset") + logger = logging.getLogger("filter_d3il_avoid_data") logger.setLevel(logging.INFO) file_handler = logging.FileHandler(log_path) file_handler.setLevel(logging.INFO) # Set the minimum level for this handler diff --git a/script/dataset/process_d3il_dataset.py b/script/dataset/process_d3il_dataset.py index e95a968..3cbe18f 100644 --- a/script/dataset/process_d3il_dataset.py +++ b/script/dataset/process_d3il_dataset.py @@ -5,7 +5,6 @@ Process d3il dataset and save it into our custom format so it can be loaded for import os import numpy as np from tqdm import tqdm -import pickle import random import matplotlib.pyplot as plt from copy import deepcopy @@ -68,7 +67,6 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): for ep in range(masks.shape[0]): traj_lengths.append(int(masks[ep].sum().item())) traj_lengths = np.array(traj_lengths) - max_traj_length = np.max(traj_lengths) # split indices in train and val num_traj = len(traj_lengths) @@ -76,22 +74,18 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): train_indices = random.sample(range(num_traj), k=num_train) # take the max and min of obs and action - obs_min = np.zeros((obs_dim)) - obs_max = np.zeros((obs_dim)) - action_min = np.zeros((action_dim)) - action_max = np.zeros((action_dim)) + obs_all = np.empty((0, obs_dim)) + action_all = np.empty((0, action_dim)) for i in tqdm(range(len(traj_lengths))): T = traj_lengths[i] obs_traj = obs[i, :T].numpy() action_traj = actions[i, :T].numpy() - obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0) - obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0) - action_min = np.min( - np.vstack((action_min, np.min(action_traj, axis=0))), axis=0 - ) - action_max = np.max( - np.vstack((action_max, np.max(action_traj, axis=0))), axis=0 - ) + obs_all = np.vstack((obs_all, obs_traj)) + action_all = np.vstack((action_all, action_traj)) + obs_min = np.min(obs_all, axis=0) + obs_max = np.max(obs_all, axis=0) + action_min = np.min(action_all, axis=0) + action_max = np.max(action_all, axis=0) logger.info("\n========== Basic Info ===========") logger.info("total transitions: {}".format(np.sum(traj_lengths))) logger.info("total trajectories: {}".format(len(traj_lengths))) @@ -105,17 +99,13 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): logger.info(f"action max: {action_max}") # do over all indices - out_train = {} - keys = [ - "observations", - "actions", - "rewards", - ] - total_timesteps = actions.shape[1] - out_train["observations"] = np.empty((0, total_timesteps, obs_dim)) - out_train["actions"] = np.empty((0, total_timesteps, action_dim)) - out_train["rewards"] = np.empty((0, total_timesteps)) - out_train["traj_length"] = [] + out_train = { + "states": [], + "actions": [], + "rewards": [], + "terminals": [], + "traj_lengths": [], + } out_val = deepcopy(out_train) for i in tqdm(range(len(traj_lengths))): if i in train_indices: @@ -124,8 +114,8 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): out = out_val T = traj_lengths[i] - obs_traj = obs[i].numpy() - action_traj = actions[i].numpy() + obs_traj = obs[i, :T].numpy() + action_traj = actions[i, :T].numpy() # scale to [-1, 1] for both ob and action obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1 @@ -133,20 +123,21 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): 2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1 ) - # get episode length - traj_length = T - out["traj_length"].append(traj_length) + out["states"].append(obs_traj) + out["actions"].append(action_traj) + out["rewards"].append(np.zeros(T)) # no reward from d3il dataset + out["terminals"].append( + np.zeros(T, dtype=bool) + ) # no terminal from d3il dataset + out["traj_lengths"].append(T) - # extract - rewards = np.zeros(total_timesteps) # no reward from d3il dataset - data_traj = { - "observations": obs_traj, - "actions": action_traj, - "rewards": rewards, - } - for key in keys: - traj = data_traj[key] - out[key] = np.vstack((out[key], traj[None])) + # Concatenate trajectories + for key in ["states", "actions", "rewards"]: + out_train[key] = np.concatenate(out_train[key], axis=0) + + # Only concatenate validation set if it exists + if val_split > 0: + out_val[key] = np.concatenate(out_val[key], axis=0) # plot all trajectories and save in a figure def plot(out, name): @@ -166,14 +157,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): pillar_xys = get_obj_xy_list() fig = plt.figure() - all_trajs = out["observations"] # num x timestep x obs - for traj, traj_length in zip(all_trajs, out["traj_length"]): + prev_index = 0 + for traj_length in out["traj_lengths"]: + traj = out["states"][prev_index : traj_length + prev_index].copy() # unnormalize traj = (traj + 1) / 2 # [-1, 1] -> [0, 1] traj = traj * (obs_max - obs_min) + obs_min plt.plot( traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3) ) + prev_index += traj_length plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-") for xy in pillar_xys: circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True) @@ -194,10 +187,20 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): # Save to np file save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz") save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz") - with open(save_train_path, "wb") as f: - pickle.dump(out_train, f) - with open(save_val_path, "wb") as f: - pickle.dump(out_val, f) + np.savez_compressed( + save_train_path, + states=out_train["states"], + actions=out_train["actions"], + rewards=out_train["rewards"], + traj_lengths=out_train["traj_lengths"], + ) + np.savez_compressed( + save_val_path, + states=out_val["states"], + actions=out_val["actions"], + rewards=out_val["rewards"], + traj_lengths=out_val["traj_lengths"], + ) normalization_save_path = os.path.join( save_dir, save_name_prefix + "normalization.npz" ) @@ -212,42 +215,42 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split): # debug logger.info("\n========== Final ===========") logger.info( - f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}" + f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}" ) logger.info( - f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}" + f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}" ) logger.info( - f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}" + f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}" ) logger.info( - f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}" + f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}" ) if val_split > 0: logger.info( - f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}" + f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}" ) logger.info( - f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}" + f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}" ) for obs_dim_ind in range(obs_dim): - obs = out_train["observations"][:, :, obs_dim_ind] + obs = out_train["states"][:, obs_dim_ind] logger.info( f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" ) for action_dim_ind in range(action_dim): - action = out_train["actions"][:, :, action_dim_ind] + action = out_train["actions"][:, action_dim_ind] logger.info( f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" ) if val_split > 0: for obs_dim_ind in range(obs_dim): - obs = out_val["observations"][:, :, obs_dim_ind] + obs = out_val["states"][:, obs_dim_ind] logger.info( f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" ) for action_dim_ind in range(action_dim): - action = out_val["actions"][:, :, action_dim_ind] + action = out_val["actions"][:, action_dim_ind] logger.info( f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" ) @@ -275,7 +278,7 @@ if __name__ == "__main__": args.save_name_prefix + f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log", ) - logger = logging.getLogger("get_D4RL_dataset") + logger = logging.getLogger("process_d3il_dataset") logger.setLevel(logging.INFO) file_handler = logging.FileHandler(log_path) file_handler.setLevel(logging.INFO) # Set the minimum level for this handler diff --git a/script/dataset/process_robomimic_dataset.py b/script/dataset/process_robomimic_dataset.py index 2abb2a6..098b280 100644 --- a/script/dataset/process_robomimic_dataset.py +++ b/script/dataset/process_robomimic_dataset.py @@ -78,7 +78,6 @@ robomimic dataset normalizes action to [-1, 1], observation roughly? to [-1, 1]. """ - import numpy as np from tqdm import tqdm import h5py @@ -127,10 +126,10 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize): # Initialize variables for tracking trajectory statistics traj_lengths = [] - obs_min = np.zeros((obs_dim)) - obs_max = np.zeros((obs_dim)) - action_min = np.zeros((action_dim)) - action_max = np.zeros((action_dim)) + obs_min = np.inf * np.ones((obs_dim)) + obs_max = -np.inf * np.ones((obs_dim)) + action_min = np.inf * np.ones((action_dim)) + action_max = -np.inf * np.ones((action_dim)) # Process each demo for ep in demos: @@ -256,6 +255,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize): logging.info( f"Val - Trajectories: {len(out_val['traj_lengths'])}, Transitions: {np.sum(out_val['traj_lengths'])}" ) + for obs_dim_ind in range(obs_dim): + obs = out_train["states"][:, obs_dim_ind] + logging.info( + f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" + ) + for action_dim_ind in range(action_dim): + action = out_train["actions"][:, action_dim_ind] + logging.info( + f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" + ) if __name__ == "__main__":