update D3IL pre-processing, fix normalization bug in robomimic pre-processing
This commit is contained in:
parent
0bdae945e9
commit
7d1b3a236f
@ -15,4 +15,10 @@ python download_datasets.py --tasks all --dataset_types mh --hdf5_types low_dim
|
||||
python download_datasets.py --tasks all --dataset_types mh --hdf5_types raw # pixel-based policy
|
||||
# for pixel, replay the trajectories to extract image observations
|
||||
python robomimic/scripts/dataset_states_to_obs.py --done_mode 2 --dataset datasets/can/mh/demo_v141.hdf5 --output_name image_v141.hdf5 --camera_names robot0_eye_in_hand --camera_height 96 --camera_width 96 --exclude-next-obs --n 100
|
||||
```
|
||||
|
||||
D3IL data: first download the raw data from [D3IL](https://github.com/ALRhub/d3il), see the Google Drive link
|
||||
```console
|
||||
python script/dataset/process_d3il_dataset.py --load_path=<avoid_data_path> --env_type=avoid # save all data
|
||||
python script/dataset/filter_d3il_avoid_data.py --load_path=<avoid_data_path> --desired_modes ... --required_modes ... # filter modes
|
||||
```
|
@ -7,7 +7,6 @@ Trajectories are normalized with filtered data, not the original data.
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import pickle
|
||||
import random
|
||||
import matplotlib.pyplot as plt
|
||||
from copy import deepcopy
|
||||
@ -91,10 +90,8 @@ def make_dataset(
|
||||
full_traj_lengths = np.array(full_traj_lengths)
|
||||
|
||||
# take the max and min of obs and action
|
||||
obs_min = np.zeros((obs_dim))
|
||||
obs_max = np.zeros((obs_dim))
|
||||
action_min = np.zeros((action_dim))
|
||||
action_max = np.zeros((action_dim))
|
||||
obs_all = np.empty((0, obs_dim))
|
||||
action_all = np.empty((0, action_dim))
|
||||
chosen_indices = []
|
||||
for i in tqdm(range(len(masks))):
|
||||
T = full_traj_lengths[i]
|
||||
@ -125,21 +122,18 @@ def make_dataset(
|
||||
continue
|
||||
chosen_indices.append(i)
|
||||
|
||||
obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0)
|
||||
obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0)
|
||||
action_min = np.min(
|
||||
np.vstack((action_min, np.min(action_traj, axis=0))), axis=0
|
||||
)
|
||||
action_max = np.max(
|
||||
np.vstack((action_max, np.max(action_traj, axis=0))), axis=0
|
||||
)
|
||||
obs_all = np.vstack((obs_all, obs_traj))
|
||||
action_all = np.vstack((action_all, action_traj))
|
||||
obs_min = np.min(obs_all, axis=0)
|
||||
obs_max = np.max(obs_all, axis=0)
|
||||
action_min = np.min(action_all, axis=0)
|
||||
action_max = np.max(action_all, axis=0)
|
||||
if len(chosen_indices) == 0:
|
||||
raise ValueError("No data found for the desired/required modes")
|
||||
chosen_indices = np.array(chosen_indices)
|
||||
traj_lengths = full_traj_lengths[chosen_indices]
|
||||
actions = demo_dataset.actions[chosen_indices]
|
||||
obs = demo_dataset.observations[chosen_indices]
|
||||
max_traj_length = np.max(traj_lengths)
|
||||
|
||||
# split indices in train and val
|
||||
num_traj = len(traj_lengths)
|
||||
@ -159,17 +153,13 @@ def make_dataset(
|
||||
logger.info(f"action max: {action_max}")
|
||||
|
||||
# do over all indices
|
||||
out_train = {}
|
||||
keys = [
|
||||
"observations",
|
||||
"actions",
|
||||
"rewards",
|
||||
]
|
||||
total_timesteps = actions.shape[1]
|
||||
out_train["observations"] = np.empty((0, total_timesteps, obs_dim))
|
||||
out_train["actions"] = np.empty((0, total_timesteps, action_dim))
|
||||
out_train["rewards"] = np.empty((0, total_timesteps))
|
||||
out_train["traj_length"] = []
|
||||
out_train = {
|
||||
"states": [],
|
||||
"actions": [],
|
||||
"rewards": [],
|
||||
"terminals": [],
|
||||
"traj_lengths": [],
|
||||
}
|
||||
out_val = deepcopy(out_train)
|
||||
for i in tqdm(range(len(traj_lengths))):
|
||||
if i in train_indices:
|
||||
@ -177,8 +167,8 @@ def make_dataset(
|
||||
else:
|
||||
out = out_val
|
||||
T = traj_lengths[i]
|
||||
obs_traj = obs[i].numpy()
|
||||
action_traj = actions[i].numpy()
|
||||
obs_traj = obs[i, :T].numpy()
|
||||
action_traj = actions[i, :T].numpy()
|
||||
|
||||
# scale to [-1, 1] for both ob and action
|
||||
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
|
||||
@ -186,20 +176,21 @@ def make_dataset(
|
||||
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
|
||||
)
|
||||
|
||||
# get episode length
|
||||
traj_length = T
|
||||
out["traj_length"].append(traj_length)
|
||||
out["states"].append(obs_traj)
|
||||
out["actions"].append(action_traj)
|
||||
out["rewards"].append(np.zeros(T)) # no reward from d3il dataset
|
||||
out["terminals"].append(
|
||||
np.zeros(T, dtype=bool)
|
||||
) # no terminal from d3il dataset
|
||||
out["traj_lengths"].append(T)
|
||||
|
||||
# extract
|
||||
rewards = np.zeros(total_timesteps) # no reward from d3il dataset
|
||||
data_traj = {
|
||||
"observations": obs_traj,
|
||||
"actions": action_traj,
|
||||
"rewards": rewards,
|
||||
}
|
||||
for key in keys:
|
||||
traj = data_traj[key]
|
||||
out[key] = np.vstack((out[key], traj[None]))
|
||||
# Concatenate trajectories
|
||||
for key in ["states", "actions", "rewards"]:
|
||||
out_train[key] = np.concatenate(out_train[key], axis=0)
|
||||
|
||||
# Only concatenate validation set if it exists
|
||||
if val_split > 0:
|
||||
out_val[key] = np.concatenate(out_val[key], axis=0)
|
||||
|
||||
# plot all trajectories and save in a figure
|
||||
def plot(out, name):
|
||||
@ -219,14 +210,16 @@ def make_dataset(
|
||||
|
||||
pillar_xys = get_obj_xy_list()
|
||||
fig = plt.figure()
|
||||
all_trajs = out["observations"] # num x timestep x obs
|
||||
for traj, traj_length in zip(all_trajs, out["traj_length"]):
|
||||
prev_index = 0
|
||||
for traj_length in out["traj_lengths"]:
|
||||
traj = out["states"][prev_index : traj_length + prev_index].copy()
|
||||
# unnormalize
|
||||
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
|
||||
traj = traj * (obs_max - obs_min) + obs_min
|
||||
plt.plot(
|
||||
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
|
||||
)
|
||||
prev_index += traj_length
|
||||
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
|
||||
for xy in pillar_xys:
|
||||
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
|
||||
@ -247,10 +240,20 @@ def make_dataset(
|
||||
# Save to np file
|
||||
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
|
||||
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
|
||||
with open(save_train_path, "wb") as f:
|
||||
pickle.dump(out_train, f)
|
||||
with open(save_val_path, "wb") as f:
|
||||
pickle.dump(out_val, f)
|
||||
np.savez_compressed(
|
||||
save_train_path,
|
||||
states=np.array(out_train["states"]),
|
||||
actions=np.array(out_train["actions"]),
|
||||
rewards=np.array(out_train["rewards"]),
|
||||
traj_lengths=np.array(out_train["traj_lengths"]),
|
||||
)
|
||||
np.savez_compressed(
|
||||
save_val_path,
|
||||
states=np.array(out_val["states"]),
|
||||
actions=np.array(out_val["actions"]),
|
||||
rewards=np.array(out_val["rewards"]),
|
||||
traj_lengths=np.array(out_val["traj_lengths"]),
|
||||
)
|
||||
normalization_save_path = os.path.join(
|
||||
save_dir, save_name_prefix + "normalization.npz"
|
||||
)
|
||||
@ -265,42 +268,42 @@ def make_dataset(
|
||||
# debug
|
||||
logger.info("\n========== Final ===========")
|
||||
logger.info(
|
||||
f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}"
|
||||
f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}"
|
||||
f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}"
|
||||
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}"
|
||||
f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}"
|
||||
)
|
||||
if val_split > 0:
|
||||
logger.info(
|
||||
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}"
|
||||
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}"
|
||||
f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}"
|
||||
)
|
||||
for obs_dim_ind in range(obs_dim):
|
||||
obs = out_train["observations"][:, :, obs_dim_ind]
|
||||
obs = out_train["states"][:, obs_dim_ind]
|
||||
logger.info(
|
||||
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||
)
|
||||
for action_dim_ind in range(action_dim):
|
||||
action = out_train["actions"][:, :, action_dim_ind]
|
||||
action = out_train["actions"][:, action_dim_ind]
|
||||
logger.info(
|
||||
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||
)
|
||||
if val_split > 0:
|
||||
for obs_dim_ind in range(obs_dim):
|
||||
obs = out_val["observations"][:, :, obs_dim_ind]
|
||||
obs = out_val["states"][:, obs_dim_ind]
|
||||
logger.info(
|
||||
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||
)
|
||||
for action_dim_ind in range(action_dim):
|
||||
action = out_val["actions"][:, :, action_dim_ind]
|
||||
action = out_val["actions"][:, action_dim_ind]
|
||||
logger.info(
|
||||
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||
)
|
||||
@ -335,7 +338,7 @@ if __name__ == "__main__":
|
||||
args.save_name_prefix
|
||||
+ f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
|
||||
)
|
||||
logger = logging.getLogger("get_D4RL_dataset")
|
||||
logger = logging.getLogger("filter_d3il_avoid_data")
|
||||
logger.setLevel(logging.INFO)
|
||||
file_handler = logging.FileHandler(log_path)
|
||||
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler
|
||||
|
@ -5,7 +5,6 @@ Process d3il dataset and save it into our custom format so it can be loaded for
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import pickle
|
||||
import random
|
||||
import matplotlib.pyplot as plt
|
||||
from copy import deepcopy
|
||||
@ -68,7 +67,6 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
for ep in range(masks.shape[0]):
|
||||
traj_lengths.append(int(masks[ep].sum().item()))
|
||||
traj_lengths = np.array(traj_lengths)
|
||||
max_traj_length = np.max(traj_lengths)
|
||||
|
||||
# split indices in train and val
|
||||
num_traj = len(traj_lengths)
|
||||
@ -76,22 +74,18 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
train_indices = random.sample(range(num_traj), k=num_train)
|
||||
|
||||
# take the max and min of obs and action
|
||||
obs_min = np.zeros((obs_dim))
|
||||
obs_max = np.zeros((obs_dim))
|
||||
action_min = np.zeros((action_dim))
|
||||
action_max = np.zeros((action_dim))
|
||||
obs_all = np.empty((0, obs_dim))
|
||||
action_all = np.empty((0, action_dim))
|
||||
for i in tqdm(range(len(traj_lengths))):
|
||||
T = traj_lengths[i]
|
||||
obs_traj = obs[i, :T].numpy()
|
||||
action_traj = actions[i, :T].numpy()
|
||||
obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0)
|
||||
obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0)
|
||||
action_min = np.min(
|
||||
np.vstack((action_min, np.min(action_traj, axis=0))), axis=0
|
||||
)
|
||||
action_max = np.max(
|
||||
np.vstack((action_max, np.max(action_traj, axis=0))), axis=0
|
||||
)
|
||||
obs_all = np.vstack((obs_all, obs_traj))
|
||||
action_all = np.vstack((action_all, action_traj))
|
||||
obs_min = np.min(obs_all, axis=0)
|
||||
obs_max = np.max(obs_all, axis=0)
|
||||
action_min = np.min(action_all, axis=0)
|
||||
action_max = np.max(action_all, axis=0)
|
||||
logger.info("\n========== Basic Info ===========")
|
||||
logger.info("total transitions: {}".format(np.sum(traj_lengths)))
|
||||
logger.info("total trajectories: {}".format(len(traj_lengths)))
|
||||
@ -105,17 +99,13 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
logger.info(f"action max: {action_max}")
|
||||
|
||||
# do over all indices
|
||||
out_train = {}
|
||||
keys = [
|
||||
"observations",
|
||||
"actions",
|
||||
"rewards",
|
||||
]
|
||||
total_timesteps = actions.shape[1]
|
||||
out_train["observations"] = np.empty((0, total_timesteps, obs_dim))
|
||||
out_train["actions"] = np.empty((0, total_timesteps, action_dim))
|
||||
out_train["rewards"] = np.empty((0, total_timesteps))
|
||||
out_train["traj_length"] = []
|
||||
out_train = {
|
||||
"states": [],
|
||||
"actions": [],
|
||||
"rewards": [],
|
||||
"terminals": [],
|
||||
"traj_lengths": [],
|
||||
}
|
||||
out_val = deepcopy(out_train)
|
||||
for i in tqdm(range(len(traj_lengths))):
|
||||
if i in train_indices:
|
||||
@ -124,8 +114,8 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
out = out_val
|
||||
|
||||
T = traj_lengths[i]
|
||||
obs_traj = obs[i].numpy()
|
||||
action_traj = actions[i].numpy()
|
||||
obs_traj = obs[i, :T].numpy()
|
||||
action_traj = actions[i, :T].numpy()
|
||||
|
||||
# scale to [-1, 1] for both ob and action
|
||||
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
|
||||
@ -133,20 +123,21 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
|
||||
)
|
||||
|
||||
# get episode length
|
||||
traj_length = T
|
||||
out["traj_length"].append(traj_length)
|
||||
out["states"].append(obs_traj)
|
||||
out["actions"].append(action_traj)
|
||||
out["rewards"].append(np.zeros(T)) # no reward from d3il dataset
|
||||
out["terminals"].append(
|
||||
np.zeros(T, dtype=bool)
|
||||
) # no terminal from d3il dataset
|
||||
out["traj_lengths"].append(T)
|
||||
|
||||
# extract
|
||||
rewards = np.zeros(total_timesteps) # no reward from d3il dataset
|
||||
data_traj = {
|
||||
"observations": obs_traj,
|
||||
"actions": action_traj,
|
||||
"rewards": rewards,
|
||||
}
|
||||
for key in keys:
|
||||
traj = data_traj[key]
|
||||
out[key] = np.vstack((out[key], traj[None]))
|
||||
# Concatenate trajectories
|
||||
for key in ["states", "actions", "rewards"]:
|
||||
out_train[key] = np.concatenate(out_train[key], axis=0)
|
||||
|
||||
# Only concatenate validation set if it exists
|
||||
if val_split > 0:
|
||||
out_val[key] = np.concatenate(out_val[key], axis=0)
|
||||
|
||||
# plot all trajectories and save in a figure
|
||||
def plot(out, name):
|
||||
@ -166,14 +157,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
|
||||
pillar_xys = get_obj_xy_list()
|
||||
fig = plt.figure()
|
||||
all_trajs = out["observations"] # num x timestep x obs
|
||||
for traj, traj_length in zip(all_trajs, out["traj_length"]):
|
||||
prev_index = 0
|
||||
for traj_length in out["traj_lengths"]:
|
||||
traj = out["states"][prev_index : traj_length + prev_index].copy()
|
||||
# unnormalize
|
||||
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
|
||||
traj = traj * (obs_max - obs_min) + obs_min
|
||||
plt.plot(
|
||||
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
|
||||
)
|
||||
prev_index += traj_length
|
||||
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
|
||||
for xy in pillar_xys:
|
||||
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
|
||||
@ -194,10 +187,20 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
# Save to np file
|
||||
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
|
||||
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
|
||||
with open(save_train_path, "wb") as f:
|
||||
pickle.dump(out_train, f)
|
||||
with open(save_val_path, "wb") as f:
|
||||
pickle.dump(out_val, f)
|
||||
np.savez_compressed(
|
||||
save_train_path,
|
||||
states=out_train["states"],
|
||||
actions=out_train["actions"],
|
||||
rewards=out_train["rewards"],
|
||||
traj_lengths=out_train["traj_lengths"],
|
||||
)
|
||||
np.savez_compressed(
|
||||
save_val_path,
|
||||
states=out_val["states"],
|
||||
actions=out_val["actions"],
|
||||
rewards=out_val["rewards"],
|
||||
traj_lengths=out_val["traj_lengths"],
|
||||
)
|
||||
normalization_save_path = os.path.join(
|
||||
save_dir, save_name_prefix + "normalization.npz"
|
||||
)
|
||||
@ -212,42 +215,42 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
||||
# debug
|
||||
logger.info("\n========== Final ===========")
|
||||
logger.info(
|
||||
f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}"
|
||||
f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}"
|
||||
f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}"
|
||||
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}"
|
||||
f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}"
|
||||
)
|
||||
if val_split > 0:
|
||||
logger.info(
|
||||
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}"
|
||||
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}"
|
||||
)
|
||||
logger.info(
|
||||
f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}"
|
||||
f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}"
|
||||
)
|
||||
for obs_dim_ind in range(obs_dim):
|
||||
obs = out_train["observations"][:, :, obs_dim_ind]
|
||||
obs = out_train["states"][:, obs_dim_ind]
|
||||
logger.info(
|
||||
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||
)
|
||||
for action_dim_ind in range(action_dim):
|
||||
action = out_train["actions"][:, :, action_dim_ind]
|
||||
action = out_train["actions"][:, action_dim_ind]
|
||||
logger.info(
|
||||
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||
)
|
||||
if val_split > 0:
|
||||
for obs_dim_ind in range(obs_dim):
|
||||
obs = out_val["observations"][:, :, obs_dim_ind]
|
||||
obs = out_val["states"][:, obs_dim_ind]
|
||||
logger.info(
|
||||
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||
)
|
||||
for action_dim_ind in range(action_dim):
|
||||
action = out_val["actions"][:, :, action_dim_ind]
|
||||
action = out_val["actions"][:, action_dim_ind]
|
||||
logger.info(
|
||||
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||
)
|
||||
@ -275,7 +278,7 @@ if __name__ == "__main__":
|
||||
args.save_name_prefix
|
||||
+ f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
|
||||
)
|
||||
logger = logging.getLogger("get_D4RL_dataset")
|
||||
logger = logging.getLogger("process_d3il_dataset")
|
||||
logger.setLevel(logging.INFO)
|
||||
file_handler = logging.FileHandler(log_path)
|
||||
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler
|
||||
|
@ -78,7 +78,6 @@ robomimic dataset normalizes action to [-1, 1], observation roughly? to [-1, 1].
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import h5py
|
||||
@ -127,10 +126,10 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize):
|
||||
|
||||
# Initialize variables for tracking trajectory statistics
|
||||
traj_lengths = []
|
||||
obs_min = np.zeros((obs_dim))
|
||||
obs_max = np.zeros((obs_dim))
|
||||
action_min = np.zeros((action_dim))
|
||||
action_max = np.zeros((action_dim))
|
||||
obs_min = np.inf * np.ones((obs_dim))
|
||||
obs_max = -np.inf * np.ones((obs_dim))
|
||||
action_min = np.inf * np.ones((action_dim))
|
||||
action_max = -np.inf * np.ones((action_dim))
|
||||
|
||||
# Process each demo
|
||||
for ep in demos:
|
||||
@ -256,6 +255,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize):
|
||||
logging.info(
|
||||
f"Val - Trajectories: {len(out_val['traj_lengths'])}, Transitions: {np.sum(out_val['traj_lengths'])}"
|
||||
)
|
||||
for obs_dim_ind in range(obs_dim):
|
||||
obs = out_train["states"][:, obs_dim_ind]
|
||||
logging.info(
|
||||
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||
)
|
||||
for action_dim_ind in range(action_dim):
|
||||
action = out_train["actions"][:, action_dim_ind]
|
||||
logging.info(
|
||||
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
Reference in New Issue
Block a user