removed legacy wrappers
This commit is contained in:
parent
14c60766c2
commit
528b7521b6
@ -1,88 +0,0 @@
|
|||||||
from alr_envs.utils.policies import get_policy_class
|
|
||||||
from mp_lib import det_promp
|
|
||||||
import numpy as np
|
|
||||||
import gym
|
|
||||||
|
|
||||||
|
|
||||||
class DetPMPEnvWrapper(gym.Wrapper):
|
|
||||||
def __init__(self,
|
|
||||||
env,
|
|
||||||
num_dof,
|
|
||||||
num_basis,
|
|
||||||
width,
|
|
||||||
off=0.01,
|
|
||||||
start_pos=None,
|
|
||||||
duration=1,
|
|
||||||
dt=0.01,
|
|
||||||
post_traj_time=0.,
|
|
||||||
policy_type=None,
|
|
||||||
weights_scale=1,
|
|
||||||
zero_start=False,
|
|
||||||
zero_goal=False,
|
|
||||||
):
|
|
||||||
super(DetPMPEnvWrapper, self).__init__(env)
|
|
||||||
self.num_dof = num_dof
|
|
||||||
self.num_basis = num_basis
|
|
||||||
self.dim = num_dof * num_basis
|
|
||||||
self.pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
|
|
||||||
zero_start=zero_start, zero_goal=zero_goal)
|
|
||||||
weights = np.zeros(shape=(num_basis, num_dof))
|
|
||||||
self.pmp.set_weights(duration, weights)
|
|
||||||
self.weights_scale = weights_scale
|
|
||||||
|
|
||||||
self.duration = duration
|
|
||||||
self.dt = dt
|
|
||||||
self.post_traj_steps = int(post_traj_time / dt)
|
|
||||||
|
|
||||||
self.start_pos = start_pos
|
|
||||||
self.zero_start = zero_start
|
|
||||||
|
|
||||||
policy_class = get_policy_class(policy_type)
|
|
||||||
self.policy = policy_class(env)
|
|
||||||
|
|
||||||
def __call__(self, params, contexts=None):
|
|
||||||
params = np.atleast_2d(params)
|
|
||||||
rewards = []
|
|
||||||
infos = []
|
|
||||||
for p, c in zip(params, contexts):
|
|
||||||
reward, info = self.rollout(p, c)
|
|
||||||
rewards.append(reward)
|
|
||||||
infos.append(info)
|
|
||||||
|
|
||||||
return np.array(rewards), infos
|
|
||||||
|
|
||||||
def rollout(self, params, context=None, render=False):
|
|
||||||
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
|
||||||
params = np.reshape(params, newshape=(self.num_basis, self.num_dof)) * self.weights_scale
|
|
||||||
self.pmp.set_weights(self.duration, params)
|
|
||||||
t, des_pos, des_vel, des_acc = self.pmp.compute_trajectory(1 / self.dt, 1.)
|
|
||||||
if self.zero_start:
|
|
||||||
des_pos += self.start_pos[None, :]
|
|
||||||
|
|
||||||
if self.post_traj_steps > 0:
|
|
||||||
des_pos = np.vstack([des_pos, np.tile(des_pos[-1, :], [self.post_traj_steps, 1])])
|
|
||||||
des_vel = np.vstack([des_vel, np.zeros(shape=(self.post_traj_steps, self.num_dof))])
|
|
||||||
|
|
||||||
self._trajectory = des_pos
|
|
||||||
self._velocity = des_vel
|
|
||||||
|
|
||||||
rews = []
|
|
||||||
infos = []
|
|
||||||
|
|
||||||
self.env.configure(context)
|
|
||||||
self.env.reset()
|
|
||||||
|
|
||||||
for t, pos_vel in enumerate(zip(des_pos, des_vel)):
|
|
||||||
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
|
|
||||||
obs, rew, done, info = self.env.step(ac)
|
|
||||||
rews.append(rew)
|
|
||||||
infos.append(info)
|
|
||||||
if render:
|
|
||||||
self.env.render(mode="human")
|
|
||||||
if done:
|
|
||||||
break
|
|
||||||
|
|
||||||
reward = np.sum(rews)
|
|
||||||
|
|
||||||
return reward, info
|
|
||||||
|
|
@ -1,173 +0,0 @@
|
|||||||
import gym
|
|
||||||
from gym.error import (AlreadyPendingCallError, NoAsyncCallError)
|
|
||||||
from gym.vector.utils import concatenate, create_empty_array
|
|
||||||
from gym.vector.async_vector_env import AsyncState
|
|
||||||
import numpy as np
|
|
||||||
import multiprocessing as mp
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
|
|
||||||
assert shared_memory is None
|
|
||||||
env = env_fn()
|
|
||||||
parent_pipe.close()
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
command, data = pipe.recv()
|
|
||||||
if command == 'reset':
|
|
||||||
observation = env.reset()
|
|
||||||
pipe.send((observation, True))
|
|
||||||
elif command == 'step':
|
|
||||||
observation, reward, done, info = env.step(data)
|
|
||||||
if done:
|
|
||||||
observation = env.reset()
|
|
||||||
pipe.send(((observation, reward, done, info), True))
|
|
||||||
elif command == 'rollout':
|
|
||||||
rewards = []
|
|
||||||
infos = []
|
|
||||||
for p, c in zip(*data):
|
|
||||||
reward, info = env.rollout(p, c)
|
|
||||||
rewards.append(reward)
|
|
||||||
infos.append(info)
|
|
||||||
pipe.send(((rewards, infos), (True, ) * len(rewards)))
|
|
||||||
elif command == 'seed':
|
|
||||||
env.seed(data)
|
|
||||||
pipe.send((None, True))
|
|
||||||
elif command == 'close':
|
|
||||||
env.close()
|
|
||||||
pipe.send((None, True))
|
|
||||||
break
|
|
||||||
elif command == 'idle':
|
|
||||||
pipe.send((None, True))
|
|
||||||
elif command == '_check_observation_space':
|
|
||||||
pipe.send((data == env.observation_space, True))
|
|
||||||
else:
|
|
||||||
raise RuntimeError('Received unknown command `{0}`. Must '
|
|
||||||
'be one of {`reset`, `step`, `seed`, `close`, '
|
|
||||||
'`_check_observation_space`}.'.format(command))
|
|
||||||
except (KeyboardInterrupt, Exception):
|
|
||||||
error_queue.put((index,) + sys.exc_info()[:2])
|
|
||||||
pipe.send((None, False))
|
|
||||||
finally:
|
|
||||||
env.close()
|
|
||||||
|
|
||||||
|
|
||||||
class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
|
|
||||||
def __init__(self, env_fns, n_samples, observation_space=None, action_space=None,
|
|
||||||
shared_memory=False, copy=True, context="spawn", daemon=True, worker=_worker):
|
|
||||||
super(DmpAsyncVectorEnv, self).__init__(env_fns,
|
|
||||||
observation_space=observation_space,
|
|
||||||
action_space=action_space,
|
|
||||||
shared_memory=shared_memory,
|
|
||||||
copy=copy,
|
|
||||||
context=context,
|
|
||||||
daemon=daemon,
|
|
||||||
worker=worker)
|
|
||||||
|
|
||||||
# we need to overwrite the number of samples as we may sample more than num_envs
|
|
||||||
self.observations = create_empty_array(self.single_observation_space,
|
|
||||||
n=n_samples,
|
|
||||||
fn=np.zeros)
|
|
||||||
|
|
||||||
def __call__(self, params, contexts=None):
|
|
||||||
return self.rollout(params, contexts)
|
|
||||||
|
|
||||||
def rollout_async(self, params, contexts):
|
|
||||||
"""
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
params : iterable of samples from `action_space`
|
|
||||||
List of actions.
|
|
||||||
"""
|
|
||||||
self._assert_is_running()
|
|
||||||
if self._state != AsyncState.DEFAULT:
|
|
||||||
raise AlreadyPendingCallError('Calling `rollout_async` while waiting '
|
|
||||||
'for a pending call to `{0}` to complete.'.format(
|
|
||||||
self._state.value), self._state.value)
|
|
||||||
|
|
||||||
params = np.atleast_2d(params)
|
|
||||||
split_params = np.array_split(params, np.minimum(len(params), self.num_envs))
|
|
||||||
if contexts is None:
|
|
||||||
split_contexts = np.array_split([None, ] * len(params), np.minimum(len(params), self.num_envs))
|
|
||||||
else:
|
|
||||||
split_contexts = np.array_split(contexts, np.minimum(len(contexts), self.num_envs))
|
|
||||||
|
|
||||||
assert np.all([len(p) == len(c) for p, c in zip(split_params, split_contexts)])
|
|
||||||
for pipe, param, context in zip(self.parent_pipes, split_params, split_contexts):
|
|
||||||
pipe.send(('rollout', (param, context)))
|
|
||||||
for pipe in self.parent_pipes[len(split_params):]:
|
|
||||||
pipe.send(('idle', None))
|
|
||||||
self._state = AsyncState.WAITING_ROLLOUT
|
|
||||||
|
|
||||||
def rollout_wait(self, timeout=None):
|
|
||||||
"""
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
timeout : int or float, optional
|
|
||||||
Number of seconds before the call to `step_wait` times out. If
|
|
||||||
`None`, the call to `step_wait` never times out.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
observations : sample from `observation_space`
|
|
||||||
A batch of observations from the vectorized environment.
|
|
||||||
|
|
||||||
rewards : `np.ndarray` instance (dtype `np.float_`)
|
|
||||||
A vector of rewards from the vectorized environment.
|
|
||||||
|
|
||||||
dones : `np.ndarray` instance (dtype `np.bool_`)
|
|
||||||
A vector whose entries indicate whether the episode has ended.
|
|
||||||
|
|
||||||
infos : list of dict
|
|
||||||
A list of auxiliary diagnostic information.
|
|
||||||
"""
|
|
||||||
self._assert_is_running()
|
|
||||||
if self._state != AsyncState.WAITING_ROLLOUT:
|
|
||||||
raise NoAsyncCallError('Calling `rollout_wait` without any prior call '
|
|
||||||
'to `rollout_async`.', AsyncState.WAITING_ROLLOUT.value)
|
|
||||||
|
|
||||||
if not self._poll(timeout):
|
|
||||||
self._state = AsyncState.DEFAULT
|
|
||||||
raise mp.TimeoutError('The call to `rollout_wait` has timed out after '
|
|
||||||
'{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))
|
|
||||||
|
|
||||||
results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
|
|
||||||
results = [r for r in results if r is not None]
|
|
||||||
self._raise_if_errors(successes)
|
|
||||||
self._state = AsyncState.DEFAULT
|
|
||||||
|
|
||||||
rewards, infos = [_flatten_list(r) for r in zip(*results)]
|
|
||||||
|
|
||||||
# for now, we ignore the observations and only return the rewards
|
|
||||||
|
|
||||||
# if not self.shared_memory:
|
|
||||||
# self.observations = concatenate(observations_list, self.observations,
|
|
||||||
# self.single_observation_space)
|
|
||||||
|
|
||||||
# return (deepcopy(self.observations) if self.copy else self.observations,
|
|
||||||
# np.array(rewards), np.array(dones, dtype=np.bool_), infos)
|
|
||||||
|
|
||||||
return np.array(rewards), infos
|
|
||||||
|
|
||||||
def rollout(self, actions, contexts):
|
|
||||||
self.rollout_async(actions, contexts)
|
|
||||||
return self.rollout_wait()
|
|
||||||
|
|
||||||
|
|
||||||
def _flatten_obs(obs):
|
|
||||||
assert isinstance(obs, (list, tuple))
|
|
||||||
assert len(obs) > 0
|
|
||||||
|
|
||||||
if isinstance(obs[0], dict):
|
|
||||||
keys = obs[0].keys()
|
|
||||||
return {k: np.stack([o[k] for o in obs]) for k in keys}
|
|
||||||
else:
|
|
||||||
return np.stack(obs)
|
|
||||||
|
|
||||||
|
|
||||||
def _flatten_list(l):
|
|
||||||
assert isinstance(l, (list, tuple))
|
|
||||||
assert len(l) > 0
|
|
||||||
assert all([len(l_) > 0 for l_ in l])
|
|
||||||
|
|
||||||
return [l__ for l_ in l for l__ in l_]
|
|
@ -1,125 +0,0 @@
|
|||||||
from alr_envs.utils.policies import get_policy_class
|
|
||||||
from mp_lib.phase import ExpDecayPhaseGenerator
|
|
||||||
from mp_lib.basis import DMPBasisGenerator
|
|
||||||
from mp_lib import dmps
|
|
||||||
import numpy as np
|
|
||||||
import gym
|
|
||||||
|
|
||||||
|
|
||||||
class DmpEnvWrapper(gym.Wrapper):
|
|
||||||
def __init__(self,
|
|
||||||
env,
|
|
||||||
num_dof,
|
|
||||||
num_basis,
|
|
||||||
start_pos=None,
|
|
||||||
final_pos=None,
|
|
||||||
duration=1,
|
|
||||||
dt=0.01,
|
|
||||||
alpha_phase=2,
|
|
||||||
bandwidth_factor=3,
|
|
||||||
learn_goal=False,
|
|
||||||
post_traj_time=0.,
|
|
||||||
policy_type=None,
|
|
||||||
weights_scale=1.,
|
|
||||||
goal_scale=1.,
|
|
||||||
):
|
|
||||||
super(DmpEnvWrapper, self).__init__(env)
|
|
||||||
self.num_dof = num_dof
|
|
||||||
self.num_basis = num_basis
|
|
||||||
self.dim = num_dof * num_basis
|
|
||||||
if learn_goal:
|
|
||||||
self.dim += num_dof
|
|
||||||
self.learn_goal = learn_goal
|
|
||||||
self.duration = duration # seconds
|
|
||||||
time_steps = int(duration / dt)
|
|
||||||
self.t = np.linspace(0, duration, time_steps)
|
|
||||||
self.post_traj_steps = int(post_traj_time / dt)
|
|
||||||
|
|
||||||
phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
|
|
||||||
basis_generator = DMPBasisGenerator(phase_generator,
|
|
||||||
duration=duration,
|
|
||||||
num_basis=self.num_basis,
|
|
||||||
basis_bandwidth_factor=bandwidth_factor)
|
|
||||||
|
|
||||||
self.dmp = dmps.DMP(num_dof=num_dof,
|
|
||||||
basis_generator=basis_generator,
|
|
||||||
phase_generator=phase_generator,
|
|
||||||
num_time_steps=time_steps,
|
|
||||||
dt=dt
|
|
||||||
)
|
|
||||||
|
|
||||||
self.dmp.dmp_start_pos = start_pos.reshape((1, num_dof))
|
|
||||||
|
|
||||||
dmp_weights = np.zeros((num_basis, num_dof))
|
|
||||||
if learn_goal:
|
|
||||||
dmp_goal_pos = np.zeros(num_dof)
|
|
||||||
else:
|
|
||||||
dmp_goal_pos = final_pos
|
|
||||||
|
|
||||||
self.dmp.set_weights(dmp_weights, dmp_goal_pos)
|
|
||||||
self.weights_scale = weights_scale
|
|
||||||
self.goal_scale = goal_scale
|
|
||||||
|
|
||||||
policy_class = get_policy_class(policy_type)
|
|
||||||
self.policy = policy_class(env)
|
|
||||||
|
|
||||||
def __call__(self, params, contexts=None):
|
|
||||||
params = np.atleast_2d(params)
|
|
||||||
rewards = []
|
|
||||||
infos = []
|
|
||||||
for p, c in zip(params, contexts):
|
|
||||||
reward, info = self.rollout(p, c)
|
|
||||||
rewards.append(reward)
|
|
||||||
infos.append(info)
|
|
||||||
|
|
||||||
return np.array(rewards), infos
|
|
||||||
|
|
||||||
def goal_and_weights(self, params):
|
|
||||||
if len(params.shape) > 1:
|
|
||||||
assert params.shape[1] == self.dim
|
|
||||||
else:
|
|
||||||
assert len(params) == self.dim
|
|
||||||
params = np.reshape(params, [1, self.dim])
|
|
||||||
|
|
||||||
if self.learn_goal:
|
|
||||||
goal_pos = params[0, -self.num_dof:]
|
|
||||||
weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
|
|
||||||
else:
|
|
||||||
goal_pos = self.dmp.dmp_goal_pos.flatten()
|
|
||||||
assert goal_pos is not None
|
|
||||||
weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
|
|
||||||
|
|
||||||
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
|
|
||||||
|
|
||||||
def rollout(self, params, context=None, render=False):
|
|
||||||
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
|
||||||
goal_pos, weight_matrix = self.goal_and_weights(params)
|
|
||||||
self.dmp.set_weights(weight_matrix, goal_pos)
|
|
||||||
trajectory, velocity = self.dmp.reference_trajectory(self.t)
|
|
||||||
|
|
||||||
if self.post_traj_steps > 0:
|
|
||||||
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
|
|
||||||
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.num_dof))])
|
|
||||||
|
|
||||||
self._trajectory = trajectory
|
|
||||||
self._velocity = velocity
|
|
||||||
|
|
||||||
rews = []
|
|
||||||
infos = []
|
|
||||||
|
|
||||||
self.env.configure(context)
|
|
||||||
self.env.reset()
|
|
||||||
|
|
||||||
for t, pos_vel in enumerate(zip(trajectory, velocity)):
|
|
||||||
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
|
|
||||||
obs, rew, done, info = self.env.step(ac)
|
|
||||||
rews.append(rew)
|
|
||||||
infos.append(info)
|
|
||||||
if render:
|
|
||||||
self.env.render(mode="human")
|
|
||||||
if done:
|
|
||||||
break
|
|
||||||
|
|
||||||
reward = np.sum(rews)
|
|
||||||
|
|
||||||
return reward, info
|
|
@ -1,28 +0,0 @@
|
|||||||
from alr_envs.utils.legacy.utils import make_holereacher_env
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
n_samples = 1
|
|
||||||
n_cpus = 4
|
|
||||||
dim = 30
|
|
||||||
|
|
||||||
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
|
||||||
# n_samples=n_samples)
|
|
||||||
|
|
||||||
test_env = make_holereacher_env(0)()
|
|
||||||
|
|
||||||
# params = np.random.randn(n_samples, dim)
|
|
||||||
params = np.array([[1.386102, -3.29980525, 4.70402733, 1.3966668, 0.73774902,
|
|
||||||
3.14676681, -4.98644416, 6.20303193, 1.30502127, -0.09330522,
|
|
||||||
7.62656797, -5.76893033, 3.4706711, -0.6944142, -3.33442788,
|
|
||||||
12.31421548, -0.72760271, -6.9090723, 7.02903814, -8.7236836,
|
|
||||||
1.4805914, 0.53185824, -5.46626893, 0.69692163, 13.58472666,
|
|
||||||
0.77199316, 2.02906724, -3.0203244, -1.00533159, -0.57417351]])
|
|
||||||
|
|
||||||
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
|
||||||
|
|
||||||
rew, info = test_env.rollout(params, render=True)
|
|
||||||
print(rew)
|
|
||||||
|
|
||||||
# out = env(params)
|
|
||||||
# print(out)
|
|
@ -1,28 +0,0 @@
|
|||||||
from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_dmp_env
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
dim = 15
|
|
||||||
n_cpus = 4
|
|
||||||
|
|
||||||
# n_samples = 10
|
|
||||||
#
|
|
||||||
# vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
|
||||||
# n_samples=n_samples)
|
|
||||||
#
|
|
||||||
# params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
|
|
||||||
#
|
|
||||||
# rewards, infos = vec_env(params)
|
|
||||||
# print(rewards)
|
|
||||||
#
|
|
||||||
non_vec_env = make_simple_dmp_env(0, 0)()
|
|
||||||
|
|
||||||
# params = 0.5 * np.random.randn(dim)
|
|
||||||
params = np.array([-2.63357598, -1.04950296, -0.44330737, 0.52950017, 4.29247739,
|
|
||||||
4.52473661, -0.05685977, -0.76796851, 3.71540499, 1.22631059,
|
|
||||||
2.20412438, 3.91588129, -0.12652723, -3.0788211 , 0.56204464])
|
|
||||||
|
|
||||||
out2 = non_vec_env.rollout(params, render=True )
|
|
||||||
|
|
||||||
print(out2)
|
|
@ -1,145 +0,0 @@
|
|||||||
import alr_envs.classic_control.hole_reacher as hr
|
|
||||||
import alr_envs.classic_control.viapoint_reacher as vpr
|
|
||||||
from alr_envs.utils.mps.dmp_wrapper import DmpWrapper
|
|
||||||
from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
def make_viapointreacher_env(rank, seed=0):
|
|
||||||
"""
|
|
||||||
Utility function for multiprocessed env.
|
|
||||||
|
|
||||||
:param env_id: (str) the environment ID
|
|
||||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
|
||||||
:param seed: (int) the initial seed for RNG
|
|
||||||
:param rank: (int) index of the subprocess
|
|
||||||
:returns a function that generates an environment
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _init():
|
|
||||||
_env = vpr.ViaPointReacher(n_links=5,
|
|
||||||
allow_self_collision=False,
|
|
||||||
collision_penalty=1000)
|
|
||||||
|
|
||||||
_env = DmpWrapper(_env,
|
|
||||||
num_dof=5,
|
|
||||||
num_basis=5,
|
|
||||||
duration=2,
|
|
||||||
alpha_phase=2.5,
|
|
||||||
dt=_env.dt,
|
|
||||||
start_pos=_env.start_pos,
|
|
||||||
learn_goal=False,
|
|
||||||
policy_type="velocity",
|
|
||||||
weights_scale=50)
|
|
||||||
_env.seed(seed + rank)
|
|
||||||
return _env
|
|
||||||
|
|
||||||
return _init
|
|
||||||
|
|
||||||
|
|
||||||
def make_holereacher_env(rank, seed=0):
|
|
||||||
"""
|
|
||||||
Utility function for multiprocessed env.
|
|
||||||
|
|
||||||
:param env_id: (str) the environment ID
|
|
||||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
|
||||||
:param seed: (int) the initial seed for RNG
|
|
||||||
:param rank: (int) index of the subprocess
|
|
||||||
:returns a function that generates an environment
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _init():
|
|
||||||
_env = hr.HoleReacherEnv(n_links=5,
|
|
||||||
allow_self_collision=False,
|
|
||||||
allow_wall_collision=False,
|
|
||||||
hole_width=0.25,
|
|
||||||
hole_depth=1,
|
|
||||||
hole_x=2,
|
|
||||||
collision_penalty=100)
|
|
||||||
|
|
||||||
_env = DmpWrapper(_env,
|
|
||||||
num_dof=5,
|
|
||||||
num_basis=5,
|
|
||||||
duration=2,
|
|
||||||
bandwidth_factor=2,
|
|
||||||
dt=_env.dt,
|
|
||||||
learn_goal=True,
|
|
||||||
alpha_phase=2,
|
|
||||||
start_pos=_env._start_pos,
|
|
||||||
policy_type="velocity",
|
|
||||||
weights_scale=50,
|
|
||||||
goal_scale=0.1
|
|
||||||
)
|
|
||||||
|
|
||||||
_env.seed(seed + rank)
|
|
||||||
return _env
|
|
||||||
|
|
||||||
return _init
|
|
||||||
|
|
||||||
|
|
||||||
def make_holereacher_fix_goal_env(rank, seed=0):
|
|
||||||
"""
|
|
||||||
Utility function for multiprocessed env.
|
|
||||||
|
|
||||||
:param env_id: (str) the environment ID
|
|
||||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
|
||||||
:param seed: (int) the initial seed for RNG
|
|
||||||
:param rank: (int) index of the subprocess
|
|
||||||
:returns a function that generates an environment
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _init():
|
|
||||||
_env = hr.HoleReacherEnv(n_links=5,
|
|
||||||
allow_self_collision=False,
|
|
||||||
allow_wall_collision=False,
|
|
||||||
hole_width=0.15,
|
|
||||||
hole_depth=1,
|
|
||||||
hole_x=1,
|
|
||||||
collision_penalty=100)
|
|
||||||
|
|
||||||
_env = DmpWrapper(_env,
|
|
||||||
num_dof=5,
|
|
||||||
num_basis=5,
|
|
||||||
duration=2,
|
|
||||||
dt=_env.dt,
|
|
||||||
learn_goal=False,
|
|
||||||
final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]),
|
|
||||||
alpha_phase=2,
|
|
||||||
start_pos=_env._start_pos,
|
|
||||||
policy_type="velocity",
|
|
||||||
weights_scale=50,
|
|
||||||
goal_scale=1
|
|
||||||
)
|
|
||||||
|
|
||||||
_env.seed(seed + rank)
|
|
||||||
return _env
|
|
||||||
|
|
||||||
return _init
|
|
||||||
|
|
||||||
|
|
||||||
def make_holereacher_env_pmp(rank, seed=0):
|
|
||||||
"""
|
|
||||||
Utility function for multiprocessed env.
|
|
||||||
|
|
||||||
:param env_id: (str) the environment ID
|
|
||||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
|
||||||
:param seed: (int) the initial seed for RNG
|
|
||||||
:param rank: (int) index of the subprocess
|
|
||||||
:returns a function that generates an environment
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _init():
|
|
||||||
_env = hr.HoleReacherEnv(n_links=5,
|
|
||||||
allow_self_collision=False,
|
|
||||||
allow_wall_collision=False,
|
|
||||||
hole_width=0.15,
|
|
||||||
hole_depth=1,
|
|
||||||
hole_x=1,
|
|
||||||
collision_penalty=1000)
|
|
||||||
|
|
||||||
_env = DetPMPWrapper(_env, num_dof=5, num_basis=5, width=0.02, duration=2, dt=_env.dt, post_traj_time=0,
|
|
||||||
policy_type="velocity", weights_scale=0.2, zero_start=True, zero_goal=False)
|
|
||||||
_env.seed(seed + rank)
|
|
||||||
return _env
|
|
||||||
|
|
||||||
return _init
|
|
Loading…
Reference in New Issue
Block a user