update D3IL pre-processing, fix normalization bug in robomimic pre-processing

This commit is contained in:
allenzren 2024-11-08 18:40:42 -05:00
parent 0bdae945e9
commit 7d1b3a236f
4 changed files with 140 additions and 119 deletions

View File

@ -16,3 +16,9 @@ python download_datasets.py --tasks all --dataset_types mh --hdf5_types raw # pi
# for pixel, replay the trajectories to extract image observations # for pixel, replay the trajectories to extract image observations
python robomimic/scripts/dataset_states_to_obs.py --done_mode 2 --dataset datasets/can/mh/demo_v141.hdf5 --output_name image_v141.hdf5 --camera_names robot0_eye_in_hand --camera_height 96 --camera_width 96 --exclude-next-obs --n 100 python robomimic/scripts/dataset_states_to_obs.py --done_mode 2 --dataset datasets/can/mh/demo_v141.hdf5 --output_name image_v141.hdf5 --camera_names robot0_eye_in_hand --camera_height 96 --camera_width 96 --exclude-next-obs --n 100
``` ```
D3IL data: first download the raw data from [D3IL](https://github.com/ALRhub/d3il), see the Google Drive link
```console
python script/dataset/process_d3il_dataset.py --load_path=<avoid_data_path> --env_type=avoid # save all data
python script/dataset/filter_d3il_avoid_data.py --load_path=<avoid_data_path> --desired_modes ... --required_modes ... # filter modes
```

View File

@ -7,7 +7,6 @@ Trajectories are normalized with filtered data, not the original data.
import os import os
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
import pickle
import random import random
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from copy import deepcopy from copy import deepcopy
@ -91,10 +90,8 @@ def make_dataset(
full_traj_lengths = np.array(full_traj_lengths) full_traj_lengths = np.array(full_traj_lengths)
# take the max and min of obs and action # take the max and min of obs and action
obs_min = np.zeros((obs_dim)) obs_all = np.empty((0, obs_dim))
obs_max = np.zeros((obs_dim)) action_all = np.empty((0, action_dim))
action_min = np.zeros((action_dim))
action_max = np.zeros((action_dim))
chosen_indices = [] chosen_indices = []
for i in tqdm(range(len(masks))): for i in tqdm(range(len(masks))):
T = full_traj_lengths[i] T = full_traj_lengths[i]
@ -125,21 +122,18 @@ def make_dataset(
continue continue
chosen_indices.append(i) chosen_indices.append(i)
obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0) obs_all = np.vstack((obs_all, obs_traj))
obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0) action_all = np.vstack((action_all, action_traj))
action_min = np.min( obs_min = np.min(obs_all, axis=0)
np.vstack((action_min, np.min(action_traj, axis=0))), axis=0 obs_max = np.max(obs_all, axis=0)
) action_min = np.min(action_all, axis=0)
action_max = np.max( action_max = np.max(action_all, axis=0)
np.vstack((action_max, np.max(action_traj, axis=0))), axis=0
)
if len(chosen_indices) == 0: if len(chosen_indices) == 0:
raise ValueError("No data found for the desired/required modes") raise ValueError("No data found for the desired/required modes")
chosen_indices = np.array(chosen_indices) chosen_indices = np.array(chosen_indices)
traj_lengths = full_traj_lengths[chosen_indices] traj_lengths = full_traj_lengths[chosen_indices]
actions = demo_dataset.actions[chosen_indices] actions = demo_dataset.actions[chosen_indices]
obs = demo_dataset.observations[chosen_indices] obs = demo_dataset.observations[chosen_indices]
max_traj_length = np.max(traj_lengths)
# split indices in train and val # split indices in train and val
num_traj = len(traj_lengths) num_traj = len(traj_lengths)
@ -159,17 +153,13 @@ def make_dataset(
logger.info(f"action max: {action_max}") logger.info(f"action max: {action_max}")
# do over all indices # do over all indices
out_train = {} out_train = {
keys = [ "states": [],
"observations", "actions": [],
"actions", "rewards": [],
"rewards", "terminals": [],
] "traj_lengths": [],
total_timesteps = actions.shape[1] }
out_train["observations"] = np.empty((0, total_timesteps, obs_dim))
out_train["actions"] = np.empty((0, total_timesteps, action_dim))
out_train["rewards"] = np.empty((0, total_timesteps))
out_train["traj_length"] = []
out_val = deepcopy(out_train) out_val = deepcopy(out_train)
for i in tqdm(range(len(traj_lengths))): for i in tqdm(range(len(traj_lengths))):
if i in train_indices: if i in train_indices:
@ -177,8 +167,8 @@ def make_dataset(
else: else:
out = out_val out = out_val
T = traj_lengths[i] T = traj_lengths[i]
obs_traj = obs[i].numpy() obs_traj = obs[i, :T].numpy()
action_traj = actions[i].numpy() action_traj = actions[i, :T].numpy()
# scale to [-1, 1] for both ob and action # scale to [-1, 1] for both ob and action
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1 obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
@ -186,20 +176,21 @@ def make_dataset(
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1 2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
) )
# get episode length out["states"].append(obs_traj)
traj_length = T out["actions"].append(action_traj)
out["traj_length"].append(traj_length) out["rewards"].append(np.zeros(T)) # no reward from d3il dataset
out["terminals"].append(
np.zeros(T, dtype=bool)
) # no terminal from d3il dataset
out["traj_lengths"].append(T)
# extract # Concatenate trajectories
rewards = np.zeros(total_timesteps) # no reward from d3il dataset for key in ["states", "actions", "rewards"]:
data_traj = { out_train[key] = np.concatenate(out_train[key], axis=0)
"observations": obs_traj,
"actions": action_traj, # Only concatenate validation set if it exists
"rewards": rewards, if val_split > 0:
} out_val[key] = np.concatenate(out_val[key], axis=0)
for key in keys:
traj = data_traj[key]
out[key] = np.vstack((out[key], traj[None]))
# plot all trajectories and save in a figure # plot all trajectories and save in a figure
def plot(out, name): def plot(out, name):
@ -219,14 +210,16 @@ def make_dataset(
pillar_xys = get_obj_xy_list() pillar_xys = get_obj_xy_list()
fig = plt.figure() fig = plt.figure()
all_trajs = out["observations"] # num x timestep x obs prev_index = 0
for traj, traj_length in zip(all_trajs, out["traj_length"]): for traj_length in out["traj_lengths"]:
traj = out["states"][prev_index : traj_length + prev_index].copy()
# unnormalize # unnormalize
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1] traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
traj = traj * (obs_max - obs_min) + obs_min traj = traj * (obs_max - obs_min) + obs_min
plt.plot( plt.plot(
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3) traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
) )
prev_index += traj_length
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-") plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
for xy in pillar_xys: for xy in pillar_xys:
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True) circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
@ -247,10 +240,20 @@ def make_dataset(
# Save to np file # Save to np file
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz") save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz") save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
with open(save_train_path, "wb") as f: np.savez_compressed(
pickle.dump(out_train, f) save_train_path,
with open(save_val_path, "wb") as f: states=np.array(out_train["states"]),
pickle.dump(out_val, f) actions=np.array(out_train["actions"]),
rewards=np.array(out_train["rewards"]),
traj_lengths=np.array(out_train["traj_lengths"]),
)
np.savez_compressed(
save_val_path,
states=np.array(out_val["states"]),
actions=np.array(out_val["actions"]),
rewards=np.array(out_val["rewards"]),
traj_lengths=np.array(out_val["traj_lengths"]),
)
normalization_save_path = os.path.join( normalization_save_path = os.path.join(
save_dir, save_name_prefix + "normalization.npz" save_dir, save_name_prefix + "normalization.npz"
) )
@ -265,42 +268,42 @@ def make_dataset(
# debug # debug
logger.info("\n========== Final ===========") logger.info("\n========== Final ===========")
logger.info( logger.info(
f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}" f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}"
) )
logger.info( logger.info(
f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}" f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}"
) )
logger.info( logger.info(
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}" f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}"
) )
logger.info( logger.info(
f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}" f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}"
) )
if val_split > 0: if val_split > 0:
logger.info( logger.info(
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}" f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}"
) )
logger.info( logger.info(
f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}" f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}"
) )
for obs_dim_ind in range(obs_dim): for obs_dim_ind in range(obs_dim):
obs = out_train["observations"][:, :, obs_dim_ind] obs = out_train["states"][:, obs_dim_ind]
logger.info( logger.info(
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
) )
for action_dim_ind in range(action_dim): for action_dim_ind in range(action_dim):
action = out_train["actions"][:, :, action_dim_ind] action = out_train["actions"][:, action_dim_ind]
logger.info( logger.info(
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
) )
if val_split > 0: if val_split > 0:
for obs_dim_ind in range(obs_dim): for obs_dim_ind in range(obs_dim):
obs = out_val["observations"][:, :, obs_dim_ind] obs = out_val["states"][:, obs_dim_ind]
logger.info( logger.info(
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
) )
for action_dim_ind in range(action_dim): for action_dim_ind in range(action_dim):
action = out_val["actions"][:, :, action_dim_ind] action = out_val["actions"][:, action_dim_ind]
logger.info( logger.info(
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
) )
@ -335,7 +338,7 @@ if __name__ == "__main__":
args.save_name_prefix args.save_name_prefix
+ f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log", + f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
) )
logger = logging.getLogger("get_D4RL_dataset") logger = logging.getLogger("filter_d3il_avoid_data")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path) file_handler = logging.FileHandler(log_path)
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler file_handler.setLevel(logging.INFO) # Set the minimum level for this handler

View File

@ -5,7 +5,6 @@ Process d3il dataset and save it into our custom format so it can be loaded for
import os import os
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
import pickle
import random import random
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from copy import deepcopy from copy import deepcopy
@ -68,7 +67,6 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
for ep in range(masks.shape[0]): for ep in range(masks.shape[0]):
traj_lengths.append(int(masks[ep].sum().item())) traj_lengths.append(int(masks[ep].sum().item()))
traj_lengths = np.array(traj_lengths) traj_lengths = np.array(traj_lengths)
max_traj_length = np.max(traj_lengths)
# split indices in train and val # split indices in train and val
num_traj = len(traj_lengths) num_traj = len(traj_lengths)
@ -76,22 +74,18 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
train_indices = random.sample(range(num_traj), k=num_train) train_indices = random.sample(range(num_traj), k=num_train)
# take the max and min of obs and action # take the max and min of obs and action
obs_min = np.zeros((obs_dim)) obs_all = np.empty((0, obs_dim))
obs_max = np.zeros((obs_dim)) action_all = np.empty((0, action_dim))
action_min = np.zeros((action_dim))
action_max = np.zeros((action_dim))
for i in tqdm(range(len(traj_lengths))): for i in tqdm(range(len(traj_lengths))):
T = traj_lengths[i] T = traj_lengths[i]
obs_traj = obs[i, :T].numpy() obs_traj = obs[i, :T].numpy()
action_traj = actions[i, :T].numpy() action_traj = actions[i, :T].numpy()
obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0) obs_all = np.vstack((obs_all, obs_traj))
obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0) action_all = np.vstack((action_all, action_traj))
action_min = np.min( obs_min = np.min(obs_all, axis=0)
np.vstack((action_min, np.min(action_traj, axis=0))), axis=0 obs_max = np.max(obs_all, axis=0)
) action_min = np.min(action_all, axis=0)
action_max = np.max( action_max = np.max(action_all, axis=0)
np.vstack((action_max, np.max(action_traj, axis=0))), axis=0
)
logger.info("\n========== Basic Info ===========") logger.info("\n========== Basic Info ===========")
logger.info("total transitions: {}".format(np.sum(traj_lengths))) logger.info("total transitions: {}".format(np.sum(traj_lengths)))
logger.info("total trajectories: {}".format(len(traj_lengths))) logger.info("total trajectories: {}".format(len(traj_lengths)))
@ -105,17 +99,13 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
logger.info(f"action max: {action_max}") logger.info(f"action max: {action_max}")
# do over all indices # do over all indices
out_train = {} out_train = {
keys = [ "states": [],
"observations", "actions": [],
"actions", "rewards": [],
"rewards", "terminals": [],
] "traj_lengths": [],
total_timesteps = actions.shape[1] }
out_train["observations"] = np.empty((0, total_timesteps, obs_dim))
out_train["actions"] = np.empty((0, total_timesteps, action_dim))
out_train["rewards"] = np.empty((0, total_timesteps))
out_train["traj_length"] = []
out_val = deepcopy(out_train) out_val = deepcopy(out_train)
for i in tqdm(range(len(traj_lengths))): for i in tqdm(range(len(traj_lengths))):
if i in train_indices: if i in train_indices:
@ -124,8 +114,8 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
out = out_val out = out_val
T = traj_lengths[i] T = traj_lengths[i]
obs_traj = obs[i].numpy() obs_traj = obs[i, :T].numpy()
action_traj = actions[i].numpy() action_traj = actions[i, :T].numpy()
# scale to [-1, 1] for both ob and action # scale to [-1, 1] for both ob and action
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1 obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
@ -133,20 +123,21 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1 2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
) )
# get episode length out["states"].append(obs_traj)
traj_length = T out["actions"].append(action_traj)
out["traj_length"].append(traj_length) out["rewards"].append(np.zeros(T)) # no reward from d3il dataset
out["terminals"].append(
np.zeros(T, dtype=bool)
) # no terminal from d3il dataset
out["traj_lengths"].append(T)
# extract # Concatenate trajectories
rewards = np.zeros(total_timesteps) # no reward from d3il dataset for key in ["states", "actions", "rewards"]:
data_traj = { out_train[key] = np.concatenate(out_train[key], axis=0)
"observations": obs_traj,
"actions": action_traj, # Only concatenate validation set if it exists
"rewards": rewards, if val_split > 0:
} out_val[key] = np.concatenate(out_val[key], axis=0)
for key in keys:
traj = data_traj[key]
out[key] = np.vstack((out[key], traj[None]))
# plot all trajectories and save in a figure # plot all trajectories and save in a figure
def plot(out, name): def plot(out, name):
@ -166,14 +157,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
pillar_xys = get_obj_xy_list() pillar_xys = get_obj_xy_list()
fig = plt.figure() fig = plt.figure()
all_trajs = out["observations"] # num x timestep x obs prev_index = 0
for traj, traj_length in zip(all_trajs, out["traj_length"]): for traj_length in out["traj_lengths"]:
traj = out["states"][prev_index : traj_length + prev_index].copy()
# unnormalize # unnormalize
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1] traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
traj = traj * (obs_max - obs_min) + obs_min traj = traj * (obs_max - obs_min) + obs_min
plt.plot( plt.plot(
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3) traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
) )
prev_index += traj_length
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-") plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
for xy in pillar_xys: for xy in pillar_xys:
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True) circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
@ -194,10 +187,20 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
# Save to np file # Save to np file
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz") save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz") save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
with open(save_train_path, "wb") as f: np.savez_compressed(
pickle.dump(out_train, f) save_train_path,
with open(save_val_path, "wb") as f: states=out_train["states"],
pickle.dump(out_val, f) actions=out_train["actions"],
rewards=out_train["rewards"],
traj_lengths=out_train["traj_lengths"],
)
np.savez_compressed(
save_val_path,
states=out_val["states"],
actions=out_val["actions"],
rewards=out_val["rewards"],
traj_lengths=out_val["traj_lengths"],
)
normalization_save_path = os.path.join( normalization_save_path = os.path.join(
save_dir, save_name_prefix + "normalization.npz" save_dir, save_name_prefix + "normalization.npz"
) )
@ -212,42 +215,42 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
# debug # debug
logger.info("\n========== Final ===========") logger.info("\n========== Final ===========")
logger.info( logger.info(
f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}" f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}"
) )
logger.info( logger.info(
f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}" f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}"
) )
logger.info( logger.info(
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}" f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}"
) )
logger.info( logger.info(
f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}" f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}"
) )
if val_split > 0: if val_split > 0:
logger.info( logger.info(
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}" f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}"
) )
logger.info( logger.info(
f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}" f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}"
) )
for obs_dim_ind in range(obs_dim): for obs_dim_ind in range(obs_dim):
obs = out_train["observations"][:, :, obs_dim_ind] obs = out_train["states"][:, obs_dim_ind]
logger.info( logger.info(
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
) )
for action_dim_ind in range(action_dim): for action_dim_ind in range(action_dim):
action = out_train["actions"][:, :, action_dim_ind] action = out_train["actions"][:, action_dim_ind]
logger.info( logger.info(
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
) )
if val_split > 0: if val_split > 0:
for obs_dim_ind in range(obs_dim): for obs_dim_ind in range(obs_dim):
obs = out_val["observations"][:, :, obs_dim_ind] obs = out_val["states"][:, obs_dim_ind]
logger.info( logger.info(
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}" f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
) )
for action_dim_ind in range(action_dim): for action_dim_ind in range(action_dim):
action = out_val["actions"][:, :, action_dim_ind] action = out_val["actions"][:, action_dim_ind]
logger.info( logger.info(
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}" f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
) )
@ -275,7 +278,7 @@ if __name__ == "__main__":
args.save_name_prefix args.save_name_prefix
+ f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log", + f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
) )
logger = logging.getLogger("get_D4RL_dataset") logger = logging.getLogger("process_d3il_dataset")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
file_handler = logging.FileHandler(log_path) file_handler = logging.FileHandler(log_path)
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler file_handler.setLevel(logging.INFO) # Set the minimum level for this handler

View File

@ -78,7 +78,6 @@ robomimic dataset normalizes action to [-1, 1], observation roughly? to [-1, 1].
""" """
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
import h5py import h5py
@ -127,10 +126,10 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize):
# Initialize variables for tracking trajectory statistics # Initialize variables for tracking trajectory statistics
traj_lengths = [] traj_lengths = []
obs_min = np.zeros((obs_dim)) obs_min = np.inf * np.ones((obs_dim))
obs_max = np.zeros((obs_dim)) obs_max = -np.inf * np.ones((obs_dim))
action_min = np.zeros((action_dim)) action_min = np.inf * np.ones((action_dim))
action_max = np.zeros((action_dim)) action_max = -np.inf * np.ones((action_dim))
# Process each demo # Process each demo
for ep in demos: for ep in demos:
@ -256,6 +255,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize):
logging.info( logging.info(
f"Val - Trajectories: {len(out_val['traj_lengths'])}, Transitions: {np.sum(out_val['traj_lengths'])}" f"Val - Trajectories: {len(out_val['traj_lengths'])}, Transitions: {np.sum(out_val['traj_lengths'])}"
) )
for obs_dim_ind in range(obs_dim):
obs = out_train["states"][:, obs_dim_ind]
logging.info(
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
)
for action_dim_ind in range(action_dim):
action = out_train["actions"][:, action_dim_ind]
logging.info(
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
)
if __name__ == "__main__": if __name__ == "__main__":