dppo/env/gym_utils/__init__.py
Allen Z. Ren dc8e0c9edc
v0.6 (#18)
* Sampling over both env and denoising steps in DPPO updates (#13)

* sample one from each chain

* full random sampling

* Add Proficient Human (PH) Configs and Pipeline (#16)

* fix missing cfg

* add ph config

* fix how terminated flags are added to buffer in ibrl

* add ph config

* offline calql for 1M gradient updates

* bug fix: number of calql online gradient steps is the number of new transitions collected

* add sample config for DPPO with ta=1

* Sampling over both env and denoising steps in DPPO updates (#13)

* sample one from each chain

* full random sampling

* fix diffusion loss when predicting initial noise

* fix dppo inds

* fix typo

* remove print statement

---------

Co-authored-by: Justin M. Lidard <jlidard@neuronic.cs.princeton.edu>
Co-authored-by: allenzren <allen.ren@princeton.edu>

* update robomimic configs

* better calql formulation

* optimize calql and ibrl training

* optimize data transfer in ppo agents

* add kitchen configs

* re-organize config folders, rerun calql and rlpd

* add scratch gym locomotion configs

* add kitchen installation dependencies

* use truncated for termination in furniture env

* update furniture and gym configs

* update README and dependencies with kitchen

* add url for new data and checkpoints

* update demo RL configs

* update batch sizes for furniture unet configs

* raise error about dropout in residual mlp

* fix observation bug in bc loss

---------

Co-authored-by: Justin Lidard <60638575+jlidard@users.noreply.github.com>
Co-authored-by: Justin M. Lidard <jlidard@neuronic.cs.princeton.edu>
2024-10-30 19:58:06 -04:00

232 lines
7.7 KiB
Python

import os
import json
try:
from collections.abc import Iterable
except ImportError:
Iterable = (tuple, list)
def make_async(
id,
num_envs=1,
asynchronous=True,
wrappers=None,
render=False,
obs_dim=23,
action_dim=7,
env_type=None,
max_episode_steps=None,
# below for furniture only
gpu_id=0,
headless=True,
record=False,
normalization_path=None,
furniture="one_leg",
randomness="low",
obs_steps=1,
act_steps=8,
sparse_reward=False,
# below for robomimic only
robomimic_env_cfg_path=None,
use_image_obs=False,
render_offscreen=False,
reward_shaping=False,
shape_meta=None,
**kwargs,
):
"""Create a vectorized environment from multiple copies of an environment,
from its id.
Parameters
----------
id : str
The environment ID. This must be a valid ID from the registry.
num_envs : int
Number of copies of the environment.
asynchronous : bool
If `True`, wraps the environments in an :class:`AsyncVectorEnv` (which uses
`multiprocessing`_ to run the environments in parallel). If ``False``,
wraps the environments in a :class:`SyncVectorEnv`.
wrappers : dictionary, optional
Each key is a wrapper class, and each value is a dictionary of arguments
Returns
-------
:class:`gym.vector.VectorEnv`
The vectorized environment.
Example
-------
>>> env = gym.vector.make('CartPole-v1', num_envs=3)
>>> env.reset()
array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827],
[ 0.03073904, 0.00145001, -0.03088818, -0.03131252],
[ 0.03468829, 0.01500225, 0.01230312, 0.01825218]],
dtype=float32)
"""
if env_type == "furniture":
from furniture_bench.envs.observation import DEFAULT_STATE_OBS
from furniture_bench.envs.furniture_rl_sim_env import FurnitureRLSimEnv
from env.gym_utils.wrapper.furniture import FurnitureRLSimEnvMultiStepWrapper
env = FurnitureRLSimEnv(
act_rot_repr="rot_6d",
action_type="pos",
april_tags=False,
concat_robot_state=True,
ctrl_mode="diffik",
obs_keys=DEFAULT_STATE_OBS,
furniture=furniture,
gpu_id=gpu_id,
headless=headless,
num_envs=num_envs,
observation_space="state",
randomness=randomness,
max_env_steps=max_episode_steps,
record=record,
pos_scalar=1,
rot_scalar=1,
stiffness=1_000,
damping=200,
)
env = FurnitureRLSimEnvMultiStepWrapper(
env,
n_obs_steps=obs_steps,
n_action_steps=act_steps,
prev_action=False,
reset_within_step=False,
pass_full_observations=False,
normalization_path=normalization_path,
sparse_reward=sparse_reward,
)
return env
# avoid import error due incompatible gym versions
from gym import spaces
from env.gym_utils.async_vector_env import AsyncVectorEnv
from env.gym_utils.sync_vector_env import SyncVectorEnv
from env.gym_utils.wrapper import wrapper_dict
__all__ = [
"AsyncVectorEnv",
"SyncVectorEnv",
"VectorEnv",
"VectorEnvWrapper",
"make",
]
# import the envs
if robomimic_env_cfg_path is not None:
import robomimic.utils.env_utils as EnvUtils
import robomimic.utils.obs_utils as ObsUtils
elif "avoiding" in id:
import gym_avoiding
else:
import d4rl.gym_mujoco
from gym.envs import make as make_
def _make_env():
if robomimic_env_cfg_path is not None:
obs_modality_dict = {
"low_dim": (
wrappers.robomimic_image.low_dim_keys
if "robomimic_image" in wrappers
else wrappers.robomimic_lowdim.low_dim_keys
),
"rgb": (
wrappers.robomimic_image.image_keys
if "robomimic_image" in wrappers
else None
),
}
if obs_modality_dict["rgb"] is None:
obs_modality_dict.pop("rgb")
ObsUtils.initialize_obs_modality_mapping_from_dict(obs_modality_dict)
if render_offscreen or use_image_obs:
os.environ["MUJOCO_GL"] = "egl"
with open(robomimic_env_cfg_path, "r") as f:
env_meta = json.load(f)
env_meta["reward_shaping"] = reward_shaping
env = EnvUtils.create_env_from_metadata(
env_meta=env_meta,
render=render,
# only way to not show collision geometry is to enable render_offscreen, which uses a lot of RAM.
render_offscreen=render_offscreen,
use_image_obs=use_image_obs,
# render_gpu_device_id=0,
)
# Robosuite's hard reset causes excessive memory consumption.
# Disabled to run more envs.
# https://github.com/ARISE-Initiative/robosuite/blob/92abf5595eddb3a845cd1093703e5a3ccd01e77e/robosuite/environments/base.py#L247-L248
env.env.hard_reset = False
else: # d3il, gym
if "kitchen" not in id: # d4rl kitchen does not support rendering!
kwargs["render"] = render
env = make_(id, **kwargs)
# add wrappers
if wrappers is not None:
for wrapper, args in wrappers.items():
env = wrapper_dict[wrapper](env, **args)
return env
def dummy_env_fn():
"""TODO(allenzren): does this dummy env allow camera obs for other envs besides robomimic?"""
import gym
import numpy as np
from env.gym_utils.wrapper.multi_step import MultiStep
# Avoid importing or using env in the main process
# to prevent OpenGL context issue with fork.
# Create a fake env whose sole purpose is to provide
# obs/action spaces and metadata.
env = gym.Env()
observation_space = spaces.Dict()
if shape_meta is not None: # rn only for images
for key, value in shape_meta["obs"].items():
shape = value["shape"]
if key.endswith("rgb"):
min_value, max_value = -1, 1
elif key.endswith("state"):
min_value, max_value = -1, 1
else:
raise RuntimeError(f"Unsupported type {key}")
observation_space[key] = spaces.Box(
low=min_value,
high=max_value,
shape=shape,
dtype=np.float32,
)
else:
observation_space["state"] = gym.spaces.Box(
-1,
1,
shape=(obs_dim,),
dtype=np.float32,
)
env.observation_space = observation_space
env.action_space = gym.spaces.Box(-1, 1, shape=(action_dim,), dtype=np.int64)
env.metadata = {
"render.modes": ["human", "rgb_array", "depth_array"],
"video.frames_per_second": 12,
}
return MultiStep(env=env, n_obs_steps=wrappers.multi_step.n_obs_steps)
env_fns = [_make_env for _ in range(num_envs)]
return (
AsyncVectorEnv(
env_fns,
dummy_env_fn=(
dummy_env_fn if render or render_offscreen or use_image_obs else None
),
delay_init="avoiding" in id, # add delay for D3IL initialization
)
if asynchronous
else SyncVectorEnv(env_fns)
)