commit last version of

This commit is contained in:
Onur 2022-05-05 18:50:20 +02:00
parent a26b9f463b
commit 2cc1ab759c
11 changed files with 250 additions and 129 deletions

View File

@ -391,8 +391,7 @@ register(
max_episode_steps=600,
kwargs={
"rndm_goal": False,
"cup_goal_pos": [0.1, -2.0],
"learn_release_step": True
"cup_goal_pos": [0.1, -2.0]
}
)
@ -404,8 +403,7 @@ register(
max_episode_steps=600,
kwargs={
"rndm_goal": True,
"cup_goal_pos": [-0.3, -1.2],
"learn_release_step": True
"cup_goal_pos": [-0.3, -1.2]
}
)
@ -646,7 +644,7 @@ for _v in _versions:
},
"movement_primitives_kwargs": {
'movement_primitives_type': 'promp',
'num_dof': 7
'action_dim': 7
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
@ -658,7 +656,9 @@ for _v in _versions:
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
# "p_gains": 0.125*np.array([200, 300, 100, 100, 10, 10, 2.5]),
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
# "d_gains": 0.025*np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]),
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
@ -753,29 +753,92 @@ for _v in _versions:
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ## HopperJump
# _versions = ["v0", "v1"]
# for _v in _versions:
# _env_id = f'ALRHopperJumpProMP-{_v}'
# register(
# id= _env_id,
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
# kwargs={
# "name": f"alr_envs:ALRHopperJump-{_v}",
# "wrappers": [mujoco.hopper_jump.MPWrapper],
# "mp_kwargs": {
# "num_dof": 3,
# "num_basis": 5,
# "duration": 2,
# "post_traj_time": 0,
# "policy_type": "motor",
# "weights_scale": 1.0,
# "zero_start": True,
# "zero_goal": False,
# "policy_kwargs": {
# "p_gains": np.ones(3),
# "d_gains": 0.1*np.ones(3)
# }
# }
# }
# )
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# ## HopperJump
# register(
# id= "ALRHopperJumpProMP-v2",
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
# kwargs={
# "name": f"alr_envs:ALRHopperJump-v2",
# "wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
# "mp_kwargs": {
# "num_dof": 3,
# "num_basis": 5,
# "duration": 2,
# "post_traj_time": 0,
# "policy_type": "motor",
# "weights_scale": 1.0,
# "zero_start": True,
# "zero_goal": False,
# "policy_kwargs": {
# "p_gains": np.ones(3),
# "d_gains": 0.1*np.ones(3)
# }
# }
# }
# )
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
## HopperJump
_versions = ["v0", "v1"]
for _v in _versions:
_env_id = f'ALRHopperJumpProMP-{_v}'
register(
id= _env_id,
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-{_v}",
"wrappers": [mujoco.hopper_jump.MPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
}
"wrappers": [mujoco.hopper_jump.NewMPWrapper],
"ep_wrapper_kwargs": {
"weight_scale": 1
},
"movement_primitives_kwargs": {
'movement_primitives_type': 'promp',
'action_dim': 3
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
'delay': 0,
'tau': 2, # initial value
'learn_tau': False,
'learn_delay': False
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3),
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 2
}
}
)
@ -784,25 +847,35 @@ for _v in _versions:
## HopperJump
register(
id= "ALRHopperJumpProMP-v2",
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
kwargs={
"name": f"alr_envs:ALRHopperJump-v2",
"wrappers": [mujoco.hopper_jump.HighCtxtMPWrapper],
"mp_kwargs": {
"num_dof": 3,
"num_basis": 5,
"duration": 2,
"post_traj_time": 0,
"policy_type": "motor",
"weights_scale": 1.0,
"zero_start": True,
"zero_goal": False,
"policy_kwargs": {
"wrappers": [mujoco.hopper_jump.NewHighCtxtMPWrapper],
"ep_wrapper_kwargs": {
"weight_scale": 1
},
"movement_primitives_kwargs": {
'movement_primitives_type': 'promp',
'action_dim': 3
},
"phase_generator_kwargs": {
'phase_generator_type': 'linear',
'delay': 0,
'tau': 2, # initial value
'learn_tau': False,
'learn_delay': False
},
"controller_kwargs": {
'controller_type': 'motor',
"p_gains": np.ones(3),
"d_gains": 0.1*np.ones(3)
"d_gains": 0.1*np.ones(3),
},
"basis_generator_kwargs": {
'basis_generator_type': 'zero_rbf',
'num_basis': 5,
'num_basis_zero_start': 2
}
}
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append("ALRHopperJumpProMP-v2")
@ -917,10 +990,3 @@ for _v in _versions:
}
)
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
# --------------------- Testing new mp wrapper -----------------------------------------------------
# register(
# id='ALRReacherProMP-v0'
# )

View File

@ -1,4 +1,3 @@
from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
from .reacher.balancing import BalancingEnv
from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
@ -11,3 +10,4 @@ from .hopper_jump.hopper_jump_on_box import ALRHopperJumpOnBoxEnv
from .hopper_throw.hopper_throw import ALRHopperThrowEnv
from .hopper_throw.hopper_throw_in_basket import ALRHopperThrowInBasketEnv
from .walker_2d_jump.walker_2d_jump import ALRWalker2dJumpEnv
from .reacher.alr_reacher import ALRReacherEnv

View File

@ -3,7 +3,6 @@ import os
import numpy as np
from gym import utils
from gym import spaces
from gym.envs.mujoco import MujocoEnv
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
@ -18,7 +17,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
rndm_goal=False, cup_goal_pos=None):
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
if cup_goal_pos.shape[0]==2:
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
@ -43,7 +42,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
# self._release_step = 175 # time step of ball release
# self._release_step = 130 # time step of ball release
self._release_step = 100 # time step of ball release
self.release_step = 100 # time step of ball release
self.ep_length = 600 # based on 3 seconds with dt = 0.005 int(self.sim_time / self.dt)
self.cup_table_id = 10
@ -52,7 +51,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
self.noise_std = 0.01
else:
self.noise_std = 0
self.learn_release_step = learn_release_step
reward_function = BeerPongReward
self.reward_function = reward_function()
@ -63,13 +61,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
def start_pos(self):
return self._start_pos
def _set_action_space(self):
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
low, high = bounds.T
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
return self.action_space
@property
def start_vel(self):
return self._start_vel
@ -109,21 +100,22 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
return self._get_obs()
def step(self, a):
self._release_step = a[-1] if self.learn_release_step else self._release_step
self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
if self.learn_release_step else self._release_step
# if a.shape[0] == 8: # we learn also when to release the ball
# self._release_step = a[-1]
# self._release_step = np.clip(self._release_step, 50, 250)
# self.release_step = 0.5/self.dt
reward_dist = 0.0
angular_vel = 0.0
applied_action = a[:a.shape[0]-int(self.learn_release_step)]
applied_action = a
reward_ctrl = - np.square(applied_action).sum()
if self.apply_gravity_comp:
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
try:
self.do_simulation(applied_action, self.frame_skip)
if self._steps < self._release_step:
if self._steps < self.release_step:
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
elif self._steps == self._release_step and self.add_noise:
elif self._steps == self.release_step and self.add_noise:
self.sim.data.qvel[7::] += self.noise_std * np.random.randn(3)
crash = False
except mujoco_py.builder.MujocoException:
@ -160,7 +152,8 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
ball_pos=ball_pos,
ball_vel=ball_vel,
success=success,
is_collided=is_collided, sim_crash=crash)
is_collided=is_collided, sim_crash=crash,
table_contact_first=int(not self.reward_function.ball_ground_contact_first))
infos.update(reward_infos)
return ob, reward, done, infos

View File

@ -162,12 +162,16 @@ class BeerPongReward:
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
1e-4 * np.mean(action_cost)
if env.learn_release_step and not self.ball_in_cup:
too_small = (env._release_step<50)*(env._release_step-50)**2
too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
reward = reward - too_small -too_big
# 1e-7*np.mean(action_cost)
# release step punishment
min_time_bound = 0.1
max_time_bound = 1.0
release_time = env.release_step*env.dt
release_time_rew = int(release_time<min_time_bound)*(-30-10*(release_time-min_time_bound)**2) \
+int(release_time>max_time_bound)*(-30-10*(release_time-max_time_bound)**2)
reward += release_time_rew
success = self.ball_in_cup
# print('release time :', release_time)
else:
reward = - 1e-2 * action_cost
# reward = - 1e-4 * action_cost

View File

@ -21,24 +21,35 @@ class NewMPWrapper(EpisodicWrapper):
[False] # env steps
])
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
if self.env.learn_release_step:
return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
else:
return step_action
# def set_mp_action_space(self):
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
# if self.mp.learn_tau:
# min_action_bounds[0] = 20*self.env.dt
# max_action_bounds[0] = 260*self.env.dt
# mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
# dtype=np.float32)
# return mp_action_space
# def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
# if self.mp.learn_tau:
# return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
# else:
# return step_action
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
if self.env.learn_release_step:
return action[:-1], action[-1] # mp_params, release step
if self.mp.learn_tau:
self.env.env.release_step = action[0]/self.env.dt # Tau value
# self.env.env.release_step = np.clip(action[0]/self.env.dt, 20, 260) # Tau value
return action, None
else:
return action, None
def set_action_space(self):
if self.env.learn_release_step:
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
return self.action_space
else:
return super(NewMPWrapper, self).set_action_space()
# def set_action_space(self):
# if self.mp.learn_tau:
# min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
# min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
# max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
# self.action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
# return self.action_space
# else:
# return super(NewMPWrapper, self).set_action_space()

View File

@ -1 +1,2 @@
from .mp_wrapper import MPWrapper, HighCtxtMPWrapper
from .new_mp_wrapper import NewMPWrapper, NewHighCtxtMPWrapper

View File

@ -80,8 +80,6 @@ class ALRHopperJumpEnv(HopperEnv):
# overwrite reset_model to make it deterministic
def reset_model(self):
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
qpos = self.init_qpos # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
qvel = self.init_qvel # + self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
@ -104,14 +102,26 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
def reset_model(self):
self._floor_geom_id = self.model.geom_name2id('floor')
self._foot_geom_id = self.model.geom_name2id('foot_geom')
noise_low = -self._reset_noise_scale
noise_high = self._reset_noise_scale
noise_low = -np.ones(self.model.nq)*self._reset_noise_scale
noise_low[1] = 0
noise_low[2] = -0.3
noise_low[3] = -0.1
noise_low[4] = -1.1
noise_low[5] = -0.785
noise_high = np.ones(self.model.nq)*self._reset_noise_scale
noise_high[1] = 0
noise_high[2] = 0.3
noise_high[3] = 0
noise_high[4] = 0
noise_high[5] = 0.785
rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
# rnd_vec[2] *= 0.05 # the angle around the y axis shouldn't be too high as the agent then falls down quickly and
# can not recover
rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
# rnd_vec[1] = np.clip(rnd_vec[1], 0, 0.3)
qpos = self.init_qpos + rnd_vec
qvel = self.init_qvel #+ self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nv)
qvel = self.init_qvel
self.set_state(qpos, qvel)
@ -167,16 +177,19 @@ if __name__ == '__main__':
env = ALRHopperJumpRndmPosEnv()
obs = env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
# if i % 10 == 0:
# env.render(mode=render_mode)
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
for k in range(10):
obs = env.reset()
print('observation :', obs[:6])
for i in range(200):
# objective.load_result("/tmp/cma")
# test with random actions
ac = env.action_space.sample()
obs, rew, d, info = env.step(ac)
# if i % 10 == 0:
# env.render(mode=render_mode)
env.render(mode=render_mode)
if d:
print('After ', i, ' steps, done: ', d)
env.reset()
env.close()

View File

@ -0,0 +1,29 @@
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
from typing import Union, Tuple
import numpy as np
class NewMPWrapper(EpisodicWrapper):
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[3:6].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[3:6].copy()
def set_active_obs(self):
return np.hstack([
[False] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[True]
])
class NewHighCtxtMPWrapper(NewMPWrapper):
def set_active_obs(self):
return np.hstack([
[True] * (5 + int(not self.env.exclude_current_positions_from_observation)), # position
[False] * 6, # velocity
[False]
])

View File

@ -68,35 +68,35 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
def viewer_setup(self):
self.viewer.cam.trackbodyid = 0
# def reset_model(self):
# qpos = self.init_qpos
# if not hasattr(self, "goal"):
# self.goal = np.array([-0.25, 0.25])
# # self.goal = self.init_qpos.copy()[:2] + 0.05
# qpos[-2:] = self.goal
# qvel = self.init_qvel
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
def reset_model(self):
qpos = self.init_qpos.copy()
while True:
self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
# self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
# self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
if np.linalg.norm(self.goal) < self.n_links / 10:
break
qpos = self.init_qpos
if not hasattr(self, "goal"):
self.goal = np.array([-0.25, 0.25])
# self.goal = self.init_qpos.copy()[:2] + 0.05
qpos[-2:] = self.goal
qvel = self.init_qvel.copy()
qvel = self.init_qvel
qvel[-2:] = 0
self.set_state(qpos, qvel)
self._steps = 0
return self._get_obs()
# def reset_model(self):
# qpos = self.init_qpos.copy()
# while True:
# self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2)
# # self.goal = self.np_random.uniform(low=0, high=self.n_links / 10, size=2)
# # self.goal = np.random.uniform(low=[-self.n_links / 10, 0], high=[0, self.n_links / 10], size=2)
# if np.linalg.norm(self.goal) < self.n_links / 10:
# break
# qpos[-2:] = self.goal
# qvel = self.init_qvel.copy()
# qvel[-2:] = 0
# self.set_state(qpos, qvel)
# self._steps = 0
#
# return self._get_obs()
# def reset_model(self):
# qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
# while True:

View File

@ -50,13 +50,10 @@ class EpisodicWrapper(gym.Env, ABC):
# rendering
self.render_mode = render_mode
self.render_kwargs = {}
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
self.mp.set_mp_times(self.time_steps)
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
min_action_bounds, max_action_bounds = mp.get_param_bounds()
self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
self.mp_action_space = self.set_mp_action_space()
self.action_space = self.set_action_space()
self.active_obs = self.set_active_obs()
@ -65,7 +62,7 @@ class EpisodicWrapper(gym.Env, ABC):
dtype=self.env.observation_space.dtype)
def get_trajectory(self, action: np.ndarray) -> Tuple:
# TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
# TODO: this follows the implementation of the mp_pytorch library which includes the parameters tau and delay at
# the beginning of the array.
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
scaled_mp_params = action.copy()
@ -84,6 +81,13 @@ class EpisodicWrapper(gym.Env, ABC):
return trajectory, velocity
def set_mp_action_space(self):
"""This function can be used to set up an individual space for the parameters of the mp."""
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
dtype=np.float32)
return mp_action_space
def set_action_space(self):
"""
This function can be used to modify the action space for considering actions which are not learned via motion
@ -179,6 +183,7 @@ class EpisodicWrapper(gym.Env, ABC):
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
# print('step/clipped action ratio: ', step_action/c_action)
obs, c_reward, done, info = self.env.step(c_action)
if self.verbose >= 2:
actions[t, :] = c_action

View File

@ -156,12 +156,11 @@ def make_mp_from_kwargs(
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
if phase_kwargs.get('tau', None) is None:
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
action_dim = mp_kwargs.pop('num_dof', None)
action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
mp_kwargs['action_dim'] = mp_kwargs.get('action_dim', np.prod(dummy_env.action_space.shape).item())
phase_gen = get_phase_generator(**phase_kwargs)
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
controller = get_controller(**controller_kwargs)
mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
mp = get_movement_primitive(basis_generator=basis_gen, **mp_kwargs)
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
return _env