update D3IL pre-processing, fix normalization bug in robomimic pre-processing
This commit is contained in:
parent
0bdae945e9
commit
7d1b3a236f
@ -15,4 +15,10 @@ python download_datasets.py --tasks all --dataset_types mh --hdf5_types low_dim
|
|||||||
python download_datasets.py --tasks all --dataset_types mh --hdf5_types raw # pixel-based policy
|
python download_datasets.py --tasks all --dataset_types mh --hdf5_types raw # pixel-based policy
|
||||||
# for pixel, replay the trajectories to extract image observations
|
# for pixel, replay the trajectories to extract image observations
|
||||||
python robomimic/scripts/dataset_states_to_obs.py --done_mode 2 --dataset datasets/can/mh/demo_v141.hdf5 --output_name image_v141.hdf5 --camera_names robot0_eye_in_hand --camera_height 96 --camera_width 96 --exclude-next-obs --n 100
|
python robomimic/scripts/dataset_states_to_obs.py --done_mode 2 --dataset datasets/can/mh/demo_v141.hdf5 --output_name image_v141.hdf5 --camera_names robot0_eye_in_hand --camera_height 96 --camera_width 96 --exclude-next-obs --n 100
|
||||||
|
```
|
||||||
|
|
||||||
|
D3IL data: first download the raw data from [D3IL](https://github.com/ALRhub/d3il), see the Google Drive link
|
||||||
|
```console
|
||||||
|
python script/dataset/process_d3il_dataset.py --load_path=<avoid_data_path> --env_type=avoid # save all data
|
||||||
|
python script/dataset/filter_d3il_avoid_data.py --load_path=<avoid_data_path> --desired_modes ... --required_modes ... # filter modes
|
||||||
```
|
```
|
@ -7,7 +7,6 @@ Trajectories are normalized with filtered data, not the original data.
|
|||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import pickle
|
|
||||||
import random
|
import random
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@ -91,10 +90,8 @@ def make_dataset(
|
|||||||
full_traj_lengths = np.array(full_traj_lengths)
|
full_traj_lengths = np.array(full_traj_lengths)
|
||||||
|
|
||||||
# take the max and min of obs and action
|
# take the max and min of obs and action
|
||||||
obs_min = np.zeros((obs_dim))
|
obs_all = np.empty((0, obs_dim))
|
||||||
obs_max = np.zeros((obs_dim))
|
action_all = np.empty((0, action_dim))
|
||||||
action_min = np.zeros((action_dim))
|
|
||||||
action_max = np.zeros((action_dim))
|
|
||||||
chosen_indices = []
|
chosen_indices = []
|
||||||
for i in tqdm(range(len(masks))):
|
for i in tqdm(range(len(masks))):
|
||||||
T = full_traj_lengths[i]
|
T = full_traj_lengths[i]
|
||||||
@ -125,21 +122,18 @@ def make_dataset(
|
|||||||
continue
|
continue
|
||||||
chosen_indices.append(i)
|
chosen_indices.append(i)
|
||||||
|
|
||||||
obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0)
|
obs_all = np.vstack((obs_all, obs_traj))
|
||||||
obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0)
|
action_all = np.vstack((action_all, action_traj))
|
||||||
action_min = np.min(
|
obs_min = np.min(obs_all, axis=0)
|
||||||
np.vstack((action_min, np.min(action_traj, axis=0))), axis=0
|
obs_max = np.max(obs_all, axis=0)
|
||||||
)
|
action_min = np.min(action_all, axis=0)
|
||||||
action_max = np.max(
|
action_max = np.max(action_all, axis=0)
|
||||||
np.vstack((action_max, np.max(action_traj, axis=0))), axis=0
|
|
||||||
)
|
|
||||||
if len(chosen_indices) == 0:
|
if len(chosen_indices) == 0:
|
||||||
raise ValueError("No data found for the desired/required modes")
|
raise ValueError("No data found for the desired/required modes")
|
||||||
chosen_indices = np.array(chosen_indices)
|
chosen_indices = np.array(chosen_indices)
|
||||||
traj_lengths = full_traj_lengths[chosen_indices]
|
traj_lengths = full_traj_lengths[chosen_indices]
|
||||||
actions = demo_dataset.actions[chosen_indices]
|
actions = demo_dataset.actions[chosen_indices]
|
||||||
obs = demo_dataset.observations[chosen_indices]
|
obs = demo_dataset.observations[chosen_indices]
|
||||||
max_traj_length = np.max(traj_lengths)
|
|
||||||
|
|
||||||
# split indices in train and val
|
# split indices in train and val
|
||||||
num_traj = len(traj_lengths)
|
num_traj = len(traj_lengths)
|
||||||
@ -159,17 +153,13 @@ def make_dataset(
|
|||||||
logger.info(f"action max: {action_max}")
|
logger.info(f"action max: {action_max}")
|
||||||
|
|
||||||
# do over all indices
|
# do over all indices
|
||||||
out_train = {}
|
out_train = {
|
||||||
keys = [
|
"states": [],
|
||||||
"observations",
|
"actions": [],
|
||||||
"actions",
|
"rewards": [],
|
||||||
"rewards",
|
"terminals": [],
|
||||||
]
|
"traj_lengths": [],
|
||||||
total_timesteps = actions.shape[1]
|
}
|
||||||
out_train["observations"] = np.empty((0, total_timesteps, obs_dim))
|
|
||||||
out_train["actions"] = np.empty((0, total_timesteps, action_dim))
|
|
||||||
out_train["rewards"] = np.empty((0, total_timesteps))
|
|
||||||
out_train["traj_length"] = []
|
|
||||||
out_val = deepcopy(out_train)
|
out_val = deepcopy(out_train)
|
||||||
for i in tqdm(range(len(traj_lengths))):
|
for i in tqdm(range(len(traj_lengths))):
|
||||||
if i in train_indices:
|
if i in train_indices:
|
||||||
@ -177,8 +167,8 @@ def make_dataset(
|
|||||||
else:
|
else:
|
||||||
out = out_val
|
out = out_val
|
||||||
T = traj_lengths[i]
|
T = traj_lengths[i]
|
||||||
obs_traj = obs[i].numpy()
|
obs_traj = obs[i, :T].numpy()
|
||||||
action_traj = actions[i].numpy()
|
action_traj = actions[i, :T].numpy()
|
||||||
|
|
||||||
# scale to [-1, 1] for both ob and action
|
# scale to [-1, 1] for both ob and action
|
||||||
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
|
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
|
||||||
@ -186,20 +176,21 @@ def make_dataset(
|
|||||||
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
|
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
|
||||||
)
|
)
|
||||||
|
|
||||||
# get episode length
|
out["states"].append(obs_traj)
|
||||||
traj_length = T
|
out["actions"].append(action_traj)
|
||||||
out["traj_length"].append(traj_length)
|
out["rewards"].append(np.zeros(T)) # no reward from d3il dataset
|
||||||
|
out["terminals"].append(
|
||||||
|
np.zeros(T, dtype=bool)
|
||||||
|
) # no terminal from d3il dataset
|
||||||
|
out["traj_lengths"].append(T)
|
||||||
|
|
||||||
# extract
|
# Concatenate trajectories
|
||||||
rewards = np.zeros(total_timesteps) # no reward from d3il dataset
|
for key in ["states", "actions", "rewards"]:
|
||||||
data_traj = {
|
out_train[key] = np.concatenate(out_train[key], axis=0)
|
||||||
"observations": obs_traj,
|
|
||||||
"actions": action_traj,
|
# Only concatenate validation set if it exists
|
||||||
"rewards": rewards,
|
if val_split > 0:
|
||||||
}
|
out_val[key] = np.concatenate(out_val[key], axis=0)
|
||||||
for key in keys:
|
|
||||||
traj = data_traj[key]
|
|
||||||
out[key] = np.vstack((out[key], traj[None]))
|
|
||||||
|
|
||||||
# plot all trajectories and save in a figure
|
# plot all trajectories and save in a figure
|
||||||
def plot(out, name):
|
def plot(out, name):
|
||||||
@ -219,14 +210,16 @@ def make_dataset(
|
|||||||
|
|
||||||
pillar_xys = get_obj_xy_list()
|
pillar_xys = get_obj_xy_list()
|
||||||
fig = plt.figure()
|
fig = plt.figure()
|
||||||
all_trajs = out["observations"] # num x timestep x obs
|
prev_index = 0
|
||||||
for traj, traj_length in zip(all_trajs, out["traj_length"]):
|
for traj_length in out["traj_lengths"]:
|
||||||
|
traj = out["states"][prev_index : traj_length + prev_index].copy()
|
||||||
# unnormalize
|
# unnormalize
|
||||||
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
|
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
|
||||||
traj = traj * (obs_max - obs_min) + obs_min
|
traj = traj * (obs_max - obs_min) + obs_min
|
||||||
plt.plot(
|
plt.plot(
|
||||||
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
|
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
|
||||||
)
|
)
|
||||||
|
prev_index += traj_length
|
||||||
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
|
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
|
||||||
for xy in pillar_xys:
|
for xy in pillar_xys:
|
||||||
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
|
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
|
||||||
@ -247,10 +240,20 @@ def make_dataset(
|
|||||||
# Save to np file
|
# Save to np file
|
||||||
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
|
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
|
||||||
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
|
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
|
||||||
with open(save_train_path, "wb") as f:
|
np.savez_compressed(
|
||||||
pickle.dump(out_train, f)
|
save_train_path,
|
||||||
with open(save_val_path, "wb") as f:
|
states=np.array(out_train["states"]),
|
||||||
pickle.dump(out_val, f)
|
actions=np.array(out_train["actions"]),
|
||||||
|
rewards=np.array(out_train["rewards"]),
|
||||||
|
traj_lengths=np.array(out_train["traj_lengths"]),
|
||||||
|
)
|
||||||
|
np.savez_compressed(
|
||||||
|
save_val_path,
|
||||||
|
states=np.array(out_val["states"]),
|
||||||
|
actions=np.array(out_val["actions"]),
|
||||||
|
rewards=np.array(out_val["rewards"]),
|
||||||
|
traj_lengths=np.array(out_val["traj_lengths"]),
|
||||||
|
)
|
||||||
normalization_save_path = os.path.join(
|
normalization_save_path = os.path.join(
|
||||||
save_dir, save_name_prefix + "normalization.npz"
|
save_dir, save_name_prefix + "normalization.npz"
|
||||||
)
|
)
|
||||||
@ -265,42 +268,42 @@ def make_dataset(
|
|||||||
# debug
|
# debug
|
||||||
logger.info("\n========== Final ===========")
|
logger.info("\n========== Final ===========")
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}"
|
f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}"
|
f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}"
|
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}"
|
f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
if val_split > 0:
|
if val_split > 0:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}"
|
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}"
|
f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
for obs_dim_ind in range(obs_dim):
|
for obs_dim_ind in range(obs_dim):
|
||||||
obs = out_train["observations"][:, :, obs_dim_ind]
|
obs = out_train["states"][:, obs_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||||
)
|
)
|
||||||
for action_dim_ind in range(action_dim):
|
for action_dim_ind in range(action_dim):
|
||||||
action = out_train["actions"][:, :, action_dim_ind]
|
action = out_train["actions"][:, action_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||||
)
|
)
|
||||||
if val_split > 0:
|
if val_split > 0:
|
||||||
for obs_dim_ind in range(obs_dim):
|
for obs_dim_ind in range(obs_dim):
|
||||||
obs = out_val["observations"][:, :, obs_dim_ind]
|
obs = out_val["states"][:, obs_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||||
)
|
)
|
||||||
for action_dim_ind in range(action_dim):
|
for action_dim_ind in range(action_dim):
|
||||||
action = out_val["actions"][:, :, action_dim_ind]
|
action = out_val["actions"][:, action_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||||
)
|
)
|
||||||
@ -335,7 +338,7 @@ if __name__ == "__main__":
|
|||||||
args.save_name_prefix
|
args.save_name_prefix
|
||||||
+ f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
|
+ f"{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
|
||||||
)
|
)
|
||||||
logger = logging.getLogger("get_D4RL_dataset")
|
logger = logging.getLogger("filter_d3il_avoid_data")
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
file_handler = logging.FileHandler(log_path)
|
file_handler = logging.FileHandler(log_path)
|
||||||
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler
|
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler
|
||||||
|
@ -5,7 +5,6 @@ Process d3il dataset and save it into our custom format so it can be loaded for
|
|||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import pickle
|
|
||||||
import random
|
import random
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@ -68,7 +67,6 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
for ep in range(masks.shape[0]):
|
for ep in range(masks.shape[0]):
|
||||||
traj_lengths.append(int(masks[ep].sum().item()))
|
traj_lengths.append(int(masks[ep].sum().item()))
|
||||||
traj_lengths = np.array(traj_lengths)
|
traj_lengths = np.array(traj_lengths)
|
||||||
max_traj_length = np.max(traj_lengths)
|
|
||||||
|
|
||||||
# split indices in train and val
|
# split indices in train and val
|
||||||
num_traj = len(traj_lengths)
|
num_traj = len(traj_lengths)
|
||||||
@ -76,22 +74,18 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
train_indices = random.sample(range(num_traj), k=num_train)
|
train_indices = random.sample(range(num_traj), k=num_train)
|
||||||
|
|
||||||
# take the max and min of obs and action
|
# take the max and min of obs and action
|
||||||
obs_min = np.zeros((obs_dim))
|
obs_all = np.empty((0, obs_dim))
|
||||||
obs_max = np.zeros((obs_dim))
|
action_all = np.empty((0, action_dim))
|
||||||
action_min = np.zeros((action_dim))
|
|
||||||
action_max = np.zeros((action_dim))
|
|
||||||
for i in tqdm(range(len(traj_lengths))):
|
for i in tqdm(range(len(traj_lengths))):
|
||||||
T = traj_lengths[i]
|
T = traj_lengths[i]
|
||||||
obs_traj = obs[i, :T].numpy()
|
obs_traj = obs[i, :T].numpy()
|
||||||
action_traj = actions[i, :T].numpy()
|
action_traj = actions[i, :T].numpy()
|
||||||
obs_min = np.min(np.vstack((obs_min, np.min(obs_traj, axis=0))), axis=0)
|
obs_all = np.vstack((obs_all, obs_traj))
|
||||||
obs_max = np.max(np.vstack((obs_max, np.max(obs_traj, axis=0))), axis=0)
|
action_all = np.vstack((action_all, action_traj))
|
||||||
action_min = np.min(
|
obs_min = np.min(obs_all, axis=0)
|
||||||
np.vstack((action_min, np.min(action_traj, axis=0))), axis=0
|
obs_max = np.max(obs_all, axis=0)
|
||||||
)
|
action_min = np.min(action_all, axis=0)
|
||||||
action_max = np.max(
|
action_max = np.max(action_all, axis=0)
|
||||||
np.vstack((action_max, np.max(action_traj, axis=0))), axis=0
|
|
||||||
)
|
|
||||||
logger.info("\n========== Basic Info ===========")
|
logger.info("\n========== Basic Info ===========")
|
||||||
logger.info("total transitions: {}".format(np.sum(traj_lengths)))
|
logger.info("total transitions: {}".format(np.sum(traj_lengths)))
|
||||||
logger.info("total trajectories: {}".format(len(traj_lengths)))
|
logger.info("total trajectories: {}".format(len(traj_lengths)))
|
||||||
@ -105,17 +99,13 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
logger.info(f"action max: {action_max}")
|
logger.info(f"action max: {action_max}")
|
||||||
|
|
||||||
# do over all indices
|
# do over all indices
|
||||||
out_train = {}
|
out_train = {
|
||||||
keys = [
|
"states": [],
|
||||||
"observations",
|
"actions": [],
|
||||||
"actions",
|
"rewards": [],
|
||||||
"rewards",
|
"terminals": [],
|
||||||
]
|
"traj_lengths": [],
|
||||||
total_timesteps = actions.shape[1]
|
}
|
||||||
out_train["observations"] = np.empty((0, total_timesteps, obs_dim))
|
|
||||||
out_train["actions"] = np.empty((0, total_timesteps, action_dim))
|
|
||||||
out_train["rewards"] = np.empty((0, total_timesteps))
|
|
||||||
out_train["traj_length"] = []
|
|
||||||
out_val = deepcopy(out_train)
|
out_val = deepcopy(out_train)
|
||||||
for i in tqdm(range(len(traj_lengths))):
|
for i in tqdm(range(len(traj_lengths))):
|
||||||
if i in train_indices:
|
if i in train_indices:
|
||||||
@ -124,8 +114,8 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
out = out_val
|
out = out_val
|
||||||
|
|
||||||
T = traj_lengths[i]
|
T = traj_lengths[i]
|
||||||
obs_traj = obs[i].numpy()
|
obs_traj = obs[i, :T].numpy()
|
||||||
action_traj = actions[i].numpy()
|
action_traj = actions[i, :T].numpy()
|
||||||
|
|
||||||
# scale to [-1, 1] for both ob and action
|
# scale to [-1, 1] for both ob and action
|
||||||
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
|
obs_traj = 2 * (obs_traj - obs_min) / (obs_max - obs_min + 1e-6) - 1
|
||||||
@ -133,20 +123,21 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
|
2 * (action_traj - action_min) / (action_max - action_min + 1e-6) - 1
|
||||||
)
|
)
|
||||||
|
|
||||||
# get episode length
|
out["states"].append(obs_traj)
|
||||||
traj_length = T
|
out["actions"].append(action_traj)
|
||||||
out["traj_length"].append(traj_length)
|
out["rewards"].append(np.zeros(T)) # no reward from d3il dataset
|
||||||
|
out["terminals"].append(
|
||||||
|
np.zeros(T, dtype=bool)
|
||||||
|
) # no terminal from d3il dataset
|
||||||
|
out["traj_lengths"].append(T)
|
||||||
|
|
||||||
# extract
|
# Concatenate trajectories
|
||||||
rewards = np.zeros(total_timesteps) # no reward from d3il dataset
|
for key in ["states", "actions", "rewards"]:
|
||||||
data_traj = {
|
out_train[key] = np.concatenate(out_train[key], axis=0)
|
||||||
"observations": obs_traj,
|
|
||||||
"actions": action_traj,
|
# Only concatenate validation set if it exists
|
||||||
"rewards": rewards,
|
if val_split > 0:
|
||||||
}
|
out_val[key] = np.concatenate(out_val[key], axis=0)
|
||||||
for key in keys:
|
|
||||||
traj = data_traj[key]
|
|
||||||
out[key] = np.vstack((out[key], traj[None]))
|
|
||||||
|
|
||||||
# plot all trajectories and save in a figure
|
# plot all trajectories and save in a figure
|
||||||
def plot(out, name):
|
def plot(out, name):
|
||||||
@ -166,14 +157,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
|
|
||||||
pillar_xys = get_obj_xy_list()
|
pillar_xys = get_obj_xy_list()
|
||||||
fig = plt.figure()
|
fig = plt.figure()
|
||||||
all_trajs = out["observations"] # num x timestep x obs
|
prev_index = 0
|
||||||
for traj, traj_length in zip(all_trajs, out["traj_length"]):
|
for traj_length in out["traj_lengths"]:
|
||||||
|
traj = out["states"][prev_index : traj_length + prev_index].copy()
|
||||||
# unnormalize
|
# unnormalize
|
||||||
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
|
traj = (traj + 1) / 2 # [-1, 1] -> [0, 1]
|
||||||
traj = traj * (obs_max - obs_min) + obs_min
|
traj = traj * (obs_max - obs_min) + obs_min
|
||||||
plt.plot(
|
plt.plot(
|
||||||
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
|
traj[:traj_length, 2], traj[:traj_length, 3], color=(0.3, 0.3, 0.3)
|
||||||
)
|
)
|
||||||
|
prev_index += traj_length
|
||||||
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
|
plt.axhline(y=0.4, color=np.array([31, 119, 180]) / 255, linestyle="-")
|
||||||
for xy in pillar_xys:
|
for xy in pillar_xys:
|
||||||
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
|
circle = plt.Circle(xy, 0.01, color=(0.0, 0.0, 0.0), fill=True)
|
||||||
@ -194,10 +187,20 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
# Save to np file
|
# Save to np file
|
||||||
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
|
save_train_path = os.path.join(save_dir, save_name_prefix + "train.npz")
|
||||||
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
|
save_val_path = os.path.join(save_dir, save_name_prefix + "val.npz")
|
||||||
with open(save_train_path, "wb") as f:
|
np.savez_compressed(
|
||||||
pickle.dump(out_train, f)
|
save_train_path,
|
||||||
with open(save_val_path, "wb") as f:
|
states=out_train["states"],
|
||||||
pickle.dump(out_val, f)
|
actions=out_train["actions"],
|
||||||
|
rewards=out_train["rewards"],
|
||||||
|
traj_lengths=out_train["traj_lengths"],
|
||||||
|
)
|
||||||
|
np.savez_compressed(
|
||||||
|
save_val_path,
|
||||||
|
states=out_val["states"],
|
||||||
|
actions=out_val["actions"],
|
||||||
|
rewards=out_val["rewards"],
|
||||||
|
traj_lengths=out_val["traj_lengths"],
|
||||||
|
)
|
||||||
normalization_save_path = os.path.join(
|
normalization_save_path = os.path.join(
|
||||||
save_dir, save_name_prefix + "normalization.npz"
|
save_dir, save_name_prefix + "normalization.npz"
|
||||||
)
|
)
|
||||||
@ -212,42 +215,42 @@ def make_dataset(load_path, save_dir, save_name_prefix, env_type, val_split):
|
|||||||
# debug
|
# debug
|
||||||
logger.info("\n========== Final ===========")
|
logger.info("\n========== Final ===========")
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Number of episodes and transitions: {len(out_train['traj_length'])}, {np.sum(out_train['traj_length'])}"
|
f"Train - Number of episodes and transitions: {len(out_train['traj_lengths'])}, {np.sum(out_train['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Number of episodes and transitions: {len(out_val['traj_length'])}, {np.sum(out_val['traj_length'])}"
|
f"Val - Number of episodes and transitions: {len(out_val['traj_lengths'])}, {np.sum(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_length'])}, {np.std(out_train['traj_length'])}"
|
f"Train - Mean/Std trajectory length: {np.mean(out_train['traj_lengths'])}, {np.std(out_train['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Max/Min trajectory length: {np.max(out_train['traj_length'])}, {np.min(out_train['traj_length'])}"
|
f"Train - Max/Min trajectory length: {np.max(out_train['traj_lengths'])}, {np.min(out_train['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
if val_split > 0:
|
if val_split > 0:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_length'])}, {np.std(out_val['traj_length'])}"
|
f"Val - Mean/Std trajectory length: {np.mean(out_val['traj_lengths'])}, {np.std(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Max/Min trajectory length: {np.max(out_val['traj_length'])}, {np.min(out_val['traj_length'])}"
|
f"Val - Max/Min trajectory length: {np.max(out_val['traj_lengths'])}, {np.min(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
for obs_dim_ind in range(obs_dim):
|
for obs_dim_ind in range(obs_dim):
|
||||||
obs = out_train["observations"][:, :, obs_dim_ind]
|
obs = out_train["states"][:, obs_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||||
)
|
)
|
||||||
for action_dim_ind in range(action_dim):
|
for action_dim_ind in range(action_dim):
|
||||||
action = out_train["actions"][:, :, action_dim_ind]
|
action = out_train["actions"][:, action_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||||
)
|
)
|
||||||
if val_split > 0:
|
if val_split > 0:
|
||||||
for obs_dim_ind in range(obs_dim):
|
for obs_dim_ind in range(obs_dim):
|
||||||
obs = out_val["observations"][:, :, obs_dim_ind]
|
obs = out_val["states"][:, obs_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
f"Val - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||||
)
|
)
|
||||||
for action_dim_ind in range(action_dim):
|
for action_dim_ind in range(action_dim):
|
||||||
action = out_val["actions"][:, :, action_dim_ind]
|
action = out_val["actions"][:, action_dim_ind]
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
f"Val - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||||
)
|
)
|
||||||
@ -275,7 +278,7 @@ if __name__ == "__main__":
|
|||||||
args.save_name_prefix
|
args.save_name_prefix
|
||||||
+ f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
|
+ f"_{datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')}.log",
|
||||||
)
|
)
|
||||||
logger = logging.getLogger("get_D4RL_dataset")
|
logger = logging.getLogger("process_d3il_dataset")
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
file_handler = logging.FileHandler(log_path)
|
file_handler = logging.FileHandler(log_path)
|
||||||
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler
|
file_handler.setLevel(logging.INFO) # Set the minimum level for this handler
|
||||||
|
@ -78,7 +78,6 @@ robomimic dataset normalizes action to [-1, 1], observation roughly? to [-1, 1].
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import h5py
|
import h5py
|
||||||
@ -127,10 +126,10 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize):
|
|||||||
|
|
||||||
# Initialize variables for tracking trajectory statistics
|
# Initialize variables for tracking trajectory statistics
|
||||||
traj_lengths = []
|
traj_lengths = []
|
||||||
obs_min = np.zeros((obs_dim))
|
obs_min = np.inf * np.ones((obs_dim))
|
||||||
obs_max = np.zeros((obs_dim))
|
obs_max = -np.inf * np.ones((obs_dim))
|
||||||
action_min = np.zeros((action_dim))
|
action_min = np.inf * np.ones((action_dim))
|
||||||
action_max = np.zeros((action_dim))
|
action_max = -np.inf * np.ones((action_dim))
|
||||||
|
|
||||||
# Process each demo
|
# Process each demo
|
||||||
for ep in demos:
|
for ep in demos:
|
||||||
@ -256,6 +255,16 @@ def make_dataset(load_path, save_dir, save_name_prefix, val_split, normalize):
|
|||||||
logging.info(
|
logging.info(
|
||||||
f"Val - Trajectories: {len(out_val['traj_lengths'])}, Transitions: {np.sum(out_val['traj_lengths'])}"
|
f"Val - Trajectories: {len(out_val['traj_lengths'])}, Transitions: {np.sum(out_val['traj_lengths'])}"
|
||||||
)
|
)
|
||||||
|
for obs_dim_ind in range(obs_dim):
|
||||||
|
obs = out_train["states"][:, obs_dim_ind]
|
||||||
|
logging.info(
|
||||||
|
f"Train - Obs dim {obs_dim_ind+1} mean {np.mean(obs)} std {np.std(obs)} min {np.min(obs)} max {np.max(obs)}"
|
||||||
|
)
|
||||||
|
for action_dim_ind in range(action_dim):
|
||||||
|
action = out_train["actions"][:, action_dim_ind]
|
||||||
|
logging.info(
|
||||||
|
f"Train - Action dim {action_dim_ind+1} mean {np.mean(action)} std {np.std(action)} min {np.min(action)} max {np.max(action)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
Reference in New Issue
Block a user