diff --git a/alr_envs/utils/legacy/__init__.py b/alr_envs/utils/legacy/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/alr_envs/utils/legacy/detpmp_env_wrapper.py b/alr_envs/utils/legacy/detpmp_env_wrapper.py deleted file mode 100644 index c667abf..0000000 --- a/alr_envs/utils/legacy/detpmp_env_wrapper.py +++ /dev/null @@ -1,88 +0,0 @@ -from alr_envs.utils.policies import get_policy_class -from mp_lib import det_promp -import numpy as np -import gym - - -class DetPMPEnvWrapper(gym.Wrapper): - def __init__(self, - env, - num_dof, - num_basis, - width, - off=0.01, - start_pos=None, - duration=1, - dt=0.01, - post_traj_time=0., - policy_type=None, - weights_scale=1, - zero_start=False, - zero_goal=False, - ): - super(DetPMPEnvWrapper, self).__init__(env) - self.num_dof = num_dof - self.num_basis = num_basis - self.dim = num_dof * num_basis - self.pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off, - zero_start=zero_start, zero_goal=zero_goal) - weights = np.zeros(shape=(num_basis, num_dof)) - self.pmp.set_weights(duration, weights) - self.weights_scale = weights_scale - - self.duration = duration - self.dt = dt - self.post_traj_steps = int(post_traj_time / dt) - - self.start_pos = start_pos - self.zero_start = zero_start - - policy_class = get_policy_class(policy_type) - self.policy = policy_class(env) - - def __call__(self, params, contexts=None): - params = np.atleast_2d(params) - rewards = [] - infos = [] - for p, c in zip(params, contexts): - reward, info = self.rollout(p, c) - rewards.append(reward) - infos.append(info) - - return np.array(rewards), infos - - def rollout(self, params, context=None, render=False): - """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" - params = np.reshape(params, newshape=(self.num_basis, self.num_dof)) * self.weights_scale - self.pmp.set_weights(self.duration, params) - t, des_pos, des_vel, des_acc = self.pmp.compute_trajectory(1 / self.dt, 1.) - if self.zero_start: - des_pos += self.start_pos[None, :] - - if self.post_traj_steps > 0: - des_pos = np.vstack([des_pos, np.tile(des_pos[-1, :], [self.post_traj_steps, 1])]) - des_vel = np.vstack([des_vel, np.zeros(shape=(self.post_traj_steps, self.num_dof))]) - - self._trajectory = des_pos - self._velocity = des_vel - - rews = [] - infos = [] - - self.env.configure(context) - self.env.reset() - - for t, pos_vel in enumerate(zip(des_pos, des_vel)): - ac = self.policy.get_action(pos_vel[0], pos_vel[1]) - obs, rew, done, info = self.env.step(ac) - rews.append(rew) - infos.append(info) - if render: - self.env.render(mode="human") - if done: - break - - reward = np.sum(rews) - - return reward, info - diff --git a/alr_envs/utils/legacy/dmp_async_vec_env.py b/alr_envs/utils/legacy/dmp_async_vec_env.py deleted file mode 100644 index 641e770..0000000 --- a/alr_envs/utils/legacy/dmp_async_vec_env.py +++ /dev/null @@ -1,173 +0,0 @@ -import gym -from gym.error import (AlreadyPendingCallError, NoAsyncCallError) -from gym.vector.utils import concatenate, create_empty_array -from gym.vector.async_vector_env import AsyncState -import numpy as np -import multiprocessing as mp -import sys - - -def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue): - assert shared_memory is None - env = env_fn() - parent_pipe.close() - try: - while True: - command, data = pipe.recv() - if command == 'reset': - observation = env.reset() - pipe.send((observation, True)) - elif command == 'step': - observation, reward, done, info = env.step(data) - if done: - observation = env.reset() - pipe.send(((observation, reward, done, info), True)) - elif command == 'rollout': - rewards = [] - infos = [] - for p, c in zip(*data): - reward, info = env.rollout(p, c) - rewards.append(reward) - infos.append(info) - pipe.send(((rewards, infos), (True, ) * len(rewards))) - elif command == 'seed': - env.seed(data) - pipe.send((None, True)) - elif command == 'close': - env.close() - pipe.send((None, True)) - break - elif command == 'idle': - pipe.send((None, True)) - elif command == '_check_observation_space': - pipe.send((data == env.observation_space, True)) - else: - raise RuntimeError('Received unknown command `{0}`. Must ' - 'be one of {`reset`, `step`, `seed`, `close`, ' - '`_check_observation_space`}.'.format(command)) - except (KeyboardInterrupt, Exception): - error_queue.put((index,) + sys.exc_info()[:2]) - pipe.send((None, False)) - finally: - env.close() - - -class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv): - def __init__(self, env_fns, n_samples, observation_space=None, action_space=None, - shared_memory=False, copy=True, context="spawn", daemon=True, worker=_worker): - super(DmpAsyncVectorEnv, self).__init__(env_fns, - observation_space=observation_space, - action_space=action_space, - shared_memory=shared_memory, - copy=copy, - context=context, - daemon=daemon, - worker=worker) - - # we need to overwrite the number of samples as we may sample more than num_envs - self.observations = create_empty_array(self.single_observation_space, - n=n_samples, - fn=np.zeros) - - def __call__(self, params, contexts=None): - return self.rollout(params, contexts) - - def rollout_async(self, params, contexts): - """ - Parameters - ---------- - params : iterable of samples from `action_space` - List of actions. - """ - self._assert_is_running() - if self._state != AsyncState.DEFAULT: - raise AlreadyPendingCallError('Calling `rollout_async` while waiting ' - 'for a pending call to `{0}` to complete.'.format( - self._state.value), self._state.value) - - params = np.atleast_2d(params) - split_params = np.array_split(params, np.minimum(len(params), self.num_envs)) - if contexts is None: - split_contexts = np.array_split([None, ] * len(params), np.minimum(len(params), self.num_envs)) - else: - split_contexts = np.array_split(contexts, np.minimum(len(contexts), self.num_envs)) - - assert np.all([len(p) == len(c) for p, c in zip(split_params, split_contexts)]) - for pipe, param, context in zip(self.parent_pipes, split_params, split_contexts): - pipe.send(('rollout', (param, context))) - for pipe in self.parent_pipes[len(split_params):]: - pipe.send(('idle', None)) - self._state = AsyncState.WAITING_ROLLOUT - - def rollout_wait(self, timeout=None): - """ - Parameters - ---------- - timeout : int or float, optional - Number of seconds before the call to `step_wait` times out. If - `None`, the call to `step_wait` never times out. - - Returns - ------- - observations : sample from `observation_space` - A batch of observations from the vectorized environment. - - rewards : `np.ndarray` instance (dtype `np.float_`) - A vector of rewards from the vectorized environment. - - dones : `np.ndarray` instance (dtype `np.bool_`) - A vector whose entries indicate whether the episode has ended. - - infos : list of dict - A list of auxiliary diagnostic information. - """ - self._assert_is_running() - if self._state != AsyncState.WAITING_ROLLOUT: - raise NoAsyncCallError('Calling `rollout_wait` without any prior call ' - 'to `rollout_async`.', AsyncState.WAITING_ROLLOUT.value) - - if not self._poll(timeout): - self._state = AsyncState.DEFAULT - raise mp.TimeoutError('The call to `rollout_wait` has timed out after ' - '{0} second{1}.'.format(timeout, 's' if timeout > 1 else '')) - - results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) - results = [r for r in results if r is not None] - self._raise_if_errors(successes) - self._state = AsyncState.DEFAULT - - rewards, infos = [_flatten_list(r) for r in zip(*results)] - - # for now, we ignore the observations and only return the rewards - - # if not self.shared_memory: - # self.observations = concatenate(observations_list, self.observations, - # self.single_observation_space) - - # return (deepcopy(self.observations) if self.copy else self.observations, - # np.array(rewards), np.array(dones, dtype=np.bool_), infos) - - return np.array(rewards), infos - - def rollout(self, actions, contexts): - self.rollout_async(actions, contexts) - return self.rollout_wait() - - -def _flatten_obs(obs): - assert isinstance(obs, (list, tuple)) - assert len(obs) > 0 - - if isinstance(obs[0], dict): - keys = obs[0].keys() - return {k: np.stack([o[k] for o in obs]) for k in keys} - else: - return np.stack(obs) - - -def _flatten_list(l): - assert isinstance(l, (list, tuple)) - assert len(l) > 0 - assert all([len(l_) > 0 for l_ in l]) - - return [l__ for l_ in l for l__ in l_] diff --git a/alr_envs/utils/legacy/dmp_env_wrapper.py b/alr_envs/utils/legacy/dmp_env_wrapper.py deleted file mode 100644 index 6835d80..0000000 --- a/alr_envs/utils/legacy/dmp_env_wrapper.py +++ /dev/null @@ -1,125 +0,0 @@ -from alr_envs.utils.policies import get_policy_class -from mp_lib.phase import ExpDecayPhaseGenerator -from mp_lib.basis import DMPBasisGenerator -from mp_lib import dmps -import numpy as np -import gym - - -class DmpEnvWrapper(gym.Wrapper): - def __init__(self, - env, - num_dof, - num_basis, - start_pos=None, - final_pos=None, - duration=1, - dt=0.01, - alpha_phase=2, - bandwidth_factor=3, - learn_goal=False, - post_traj_time=0., - policy_type=None, - weights_scale=1., - goal_scale=1., - ): - super(DmpEnvWrapper, self).__init__(env) - self.num_dof = num_dof - self.num_basis = num_basis - self.dim = num_dof * num_basis - if learn_goal: - self.dim += num_dof - self.learn_goal = learn_goal - self.duration = duration # seconds - time_steps = int(duration / dt) - self.t = np.linspace(0, duration, time_steps) - self.post_traj_steps = int(post_traj_time / dt) - - phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration) - basis_generator = DMPBasisGenerator(phase_generator, - duration=duration, - num_basis=self.num_basis, - basis_bandwidth_factor=bandwidth_factor) - - self.dmp = dmps.DMP(num_dof=num_dof, - basis_generator=basis_generator, - phase_generator=phase_generator, - num_time_steps=time_steps, - dt=dt - ) - - self.dmp.dmp_start_pos = start_pos.reshape((1, num_dof)) - - dmp_weights = np.zeros((num_basis, num_dof)) - if learn_goal: - dmp_goal_pos = np.zeros(num_dof) - else: - dmp_goal_pos = final_pos - - self.dmp.set_weights(dmp_weights, dmp_goal_pos) - self.weights_scale = weights_scale - self.goal_scale = goal_scale - - policy_class = get_policy_class(policy_type) - self.policy = policy_class(env) - - def __call__(self, params, contexts=None): - params = np.atleast_2d(params) - rewards = [] - infos = [] - for p, c in zip(params, contexts): - reward, info = self.rollout(p, c) - rewards.append(reward) - infos.append(info) - - return np.array(rewards), infos - - def goal_and_weights(self, params): - if len(params.shape) > 1: - assert params.shape[1] == self.dim - else: - assert len(params) == self.dim - params = np.reshape(params, [1, self.dim]) - - if self.learn_goal: - goal_pos = params[0, -self.num_dof:] - weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof]) - else: - goal_pos = self.dmp.dmp_goal_pos.flatten() - assert goal_pos is not None - weight_matrix = np.reshape(params, [self.num_basis, self.num_dof]) - - return goal_pos * self.goal_scale, weight_matrix * self.weights_scale - - def rollout(self, params, context=None, render=False): - """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" - goal_pos, weight_matrix = self.goal_and_weights(params) - self.dmp.set_weights(weight_matrix, goal_pos) - trajectory, velocity = self.dmp.reference_trajectory(self.t) - - if self.post_traj_steps > 0: - trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])]) - velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.num_dof))]) - - self._trajectory = trajectory - self._velocity = velocity - - rews = [] - infos = [] - - self.env.configure(context) - self.env.reset() - - for t, pos_vel in enumerate(zip(trajectory, velocity)): - ac = self.policy.get_action(pos_vel[0], pos_vel[1]) - obs, rew, done, info = self.env.step(ac) - rews.append(rew) - infos.append(info) - if render: - self.env.render(mode="human") - if done: - break - - reward = np.sum(rews) - - return reward, info diff --git a/alr_envs/utils/legacy/dmp_env_wrapper_example.py b/alr_envs/utils/legacy/dmp_env_wrapper_example.py deleted file mode 100644 index d2edae5..0000000 --- a/alr_envs/utils/legacy/dmp_env_wrapper_example.py +++ /dev/null @@ -1,28 +0,0 @@ -from alr_envs.utils.legacy.utils import make_holereacher_env -import numpy as np - -if __name__ == "__main__": - n_samples = 1 - n_cpus = 4 - dim = 30 - - # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], - # n_samples=n_samples) - - test_env = make_holereacher_env(0)() - - # params = np.random.randn(n_samples, dim) - params = np.array([[1.386102, -3.29980525, 4.70402733, 1.3966668, 0.73774902, - 3.14676681, -4.98644416, 6.20303193, 1.30502127, -0.09330522, - 7.62656797, -5.76893033, 3.4706711, -0.6944142, -3.33442788, - 12.31421548, -0.72760271, -6.9090723, 7.02903814, -8.7236836, - 1.4805914, 0.53185824, -5.46626893, 0.69692163, 13.58472666, - 0.77199316, 2.02906724, -3.0203244, -1.00533159, -0.57417351]]) - - # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])]) - - rew, info = test_env.rollout(params, render=True) - print(rew) - - # out = env(params) - # print(out) diff --git a/alr_envs/utils/legacy/dmp_pd_control_example.py b/alr_envs/utils/legacy/dmp_pd_control_example.py deleted file mode 100644 index 3b713f3..0000000 --- a/alr_envs/utils/legacy/dmp_pd_control_example.py +++ /dev/null @@ -1,28 +0,0 @@ -from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_dmp_env -import numpy as np - -if __name__ == "__main__": - - dim = 15 - n_cpus = 4 - - # n_samples = 10 - # - # vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)], - # n_samples=n_samples) - # - # params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1)) - # - # rewards, infos = vec_env(params) - # print(rewards) - # - non_vec_env = make_simple_dmp_env(0, 0)() - - # params = 0.5 * np.random.randn(dim) - params = np.array([-2.63357598, -1.04950296, -0.44330737, 0.52950017, 4.29247739, - 4.52473661, -0.05685977, -0.76796851, 3.71540499, 1.22631059, - 2.20412438, 3.91588129, -0.12652723, -3.0788211 , 0.56204464]) - - out2 = non_vec_env.rollout(params, render=True ) - - print(out2) diff --git a/alr_envs/utils/legacy/utils.py b/alr_envs/utils/legacy/utils.py deleted file mode 100644 index e96eee6..0000000 --- a/alr_envs/utils/legacy/utils.py +++ /dev/null @@ -1,145 +0,0 @@ -import alr_envs.classic_control.hole_reacher as hr -import alr_envs.classic_control.viapoint_reacher as vpr -from alr_envs.utils.mps.dmp_wrapper import DmpWrapper -from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper -import numpy as np - - -def make_viapointreacher_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - _env = vpr.ViaPointReacher(n_links=5, - allow_self_collision=False, - collision_penalty=1000) - - _env = DmpWrapper(_env, - num_dof=5, - num_basis=5, - duration=2, - alpha_phase=2.5, - dt=_env.dt, - start_pos=_env.start_pos, - learn_goal=False, - policy_type="velocity", - weights_scale=50) - _env.seed(seed + rank) - return _env - - return _init - - -def make_holereacher_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - _env = hr.HoleReacherEnv(n_links=5, - allow_self_collision=False, - allow_wall_collision=False, - hole_width=0.25, - hole_depth=1, - hole_x=2, - collision_penalty=100) - - _env = DmpWrapper(_env, - num_dof=5, - num_basis=5, - duration=2, - bandwidth_factor=2, - dt=_env.dt, - learn_goal=True, - alpha_phase=2, - start_pos=_env._start_pos, - policy_type="velocity", - weights_scale=50, - goal_scale=0.1 - ) - - _env.seed(seed + rank) - return _env - - return _init - - -def make_holereacher_fix_goal_env(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - _env = hr.HoleReacherEnv(n_links=5, - allow_self_collision=False, - allow_wall_collision=False, - hole_width=0.15, - hole_depth=1, - hole_x=1, - collision_penalty=100) - - _env = DmpWrapper(_env, - num_dof=5, - num_basis=5, - duration=2, - dt=_env.dt, - learn_goal=False, - final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]), - alpha_phase=2, - start_pos=_env._start_pos, - policy_type="velocity", - weights_scale=50, - goal_scale=1 - ) - - _env.seed(seed + rank) - return _env - - return _init - - -def make_holereacher_env_pmp(rank, seed=0): - """ - Utility function for multiprocessed env. - - :param env_id: (str) the environment ID - :param num_env: (int) the number of environments you wish to have in subprocesses - :param seed: (int) the initial seed for RNG - :param rank: (int) index of the subprocess - :returns a function that generates an environment - """ - - def _init(): - _env = hr.HoleReacherEnv(n_links=5, - allow_self_collision=False, - allow_wall_collision=False, - hole_width=0.15, - hole_depth=1, - hole_x=1, - collision_penalty=1000) - - _env = DetPMPWrapper(_env, num_dof=5, num_basis=5, width=0.02, duration=2, dt=_env.dt, post_traj_time=0, - policy_type="velocity", weights_scale=0.2, zero_start=True, zero_goal=False) - _env.seed(seed + rank) - return _env - - return _init