removed legacy wrappers

2021-05-17 17:59:28 +02:00 · 2021-05-17 17:59:28 +02:00 · 528b7521b6
commit 528b7521b6
parent 14c60766c2
7 changed files with 0 additions and 587 deletions
--- a/alr_envs/utils/legacy/init.py
+++ b/alr_envs/utils/legacy/init.py
--- a/alr_envs/utils/legacy/detpmp_env_wrapper.py
+++ b/alr_envs/utils/legacy/detpmp_env_wrapper.py
@ -1,88 +0,0 @@
 from alr_envs.utils.policies import get_policy_class
 from mp_lib import det_promp
 import numpy as np
 import gym
 class DetPMPEnvWrapper(gym.Wrapper):
    def __init__(self,
                 env,
                 num_dof,
                 num_basis,
                 width,
                 off=0.01,
                 start_pos=None,
                 duration=1,
                 dt=0.01,
                 post_traj_time=0.,
                 policy_type=None,
                 weights_scale=1,
                 zero_start=False,
                 zero_goal=False,
                 ):
        super(DetPMPEnvWrapper, self).__init__(env)
        self.num_dof = num_dof
        self.num_basis = num_basis
        self.dim = num_dof * num_basis
        self.pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
                                                zero_start=zero_start, zero_goal=zero_goal)
        weights = np.zeros(shape=(num_basis, num_dof))
        self.pmp.set_weights(duration, weights)
        self.weights_scale = weights_scale
        self.duration = duration
        self.dt = dt
        self.post_traj_steps = int(post_traj_time / dt)
        self.start_pos = start_pos
        self.zero_start = zero_start
        policy_class = get_policy_class(policy_type)
        self.policy = policy_class(env)
    def __call__(self, params, contexts=None):
        params = np.atleast_2d(params)
        rewards = []
        infos = []
        for p, c in zip(params, contexts):
            reward, info = self.rollout(p, c)
            rewards.append(reward)
            infos.append(info)
        return np.array(rewards), infos
    def rollout(self, params, context=None, render=False):
        """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
        params = np.reshape(params, newshape=(self.num_basis, self.num_dof)) * self.weights_scale
        self.pmp.set_weights(self.duration, params)
        t, des_pos, des_vel, des_acc = self.pmp.compute_trajectory(1 / self.dt, 1.)
        if self.zero_start:
            des_pos += self.start_pos[None, :]
        if self.post_traj_steps > 0:
            des_pos = np.vstack([des_pos, np.tile(des_pos[-1, :], [self.post_traj_steps, 1])])
            des_vel = np.vstack([des_vel, np.zeros(shape=(self.post_traj_steps, self.num_dof))])
        self._trajectory = des_pos
        self._velocity = des_vel
        rews = []
        infos = []
        self.env.configure(context)
        self.env.reset()
        for t, pos_vel in enumerate(zip(des_pos, des_vel)):
            ac = self.policy.get_action(pos_vel[0], pos_vel[1])
            obs, rew, done, info = self.env.step(ac)
            rews.append(rew)
            infos.append(info)
            if render:
                self.env.render(mode="human")
            if done:
                break
        reward = np.sum(rews)
        return reward, info
--- a/alr_envs/utils/legacy/dmp_async_vec_env.py
+++ b/alr_envs/utils/legacy/dmp_async_vec_env.py
@ -1,173 +0,0 @@
 import gym
 from gym.error import (AlreadyPendingCallError, NoAsyncCallError)
 from gym.vector.utils import concatenate, create_empty_array
 from gym.vector.async_vector_env import AsyncState
 import numpy as np
 import multiprocessing as mp
 import sys
 def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
    assert shared_memory is None
    env = env_fn()
    parent_pipe.close()
    try:
        while True:
            command, data = pipe.recv()
            if command == 'reset':
                observation = env.reset()
                pipe.send((observation, True))
            elif command == 'step':
                observation, reward, done, info = env.step(data)
                if done:
                    observation = env.reset()
                pipe.send(((observation, reward, done, info), True))
            elif command == 'rollout':
                rewards = []
                infos = []
                for p, c in zip(*data):
                    reward, info = env.rollout(p, c)
                    rewards.append(reward)
                    infos.append(info)
                pipe.send(((rewards, infos), (True, ) * len(rewards)))
            elif command == 'seed':
                env.seed(data)
                pipe.send((None, True))
            elif command == 'close':
                env.close()
                pipe.send((None, True))
                break
            elif command == 'idle':
                pipe.send((None, True))
            elif command == '_check_observation_space':
                pipe.send((data == env.observation_space, True))
            else:
                raise RuntimeError('Received unknown command `{0}`. Must '
                    'be one of {`reset`, `step`, `seed`, `close`, '
                    '`_check_observation_space`}.'.format(command))
    except (KeyboardInterrupt, Exception):
        error_queue.put((index,) + sys.exc_info()[:2])
        pipe.send((None, False))
    finally:
        env.close()
 class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
    def __init__(self, env_fns, n_samples, observation_space=None, action_space=None,
                 shared_memory=False, copy=True, context="spawn", daemon=True, worker=_worker):
        super(DmpAsyncVectorEnv, self).__init__(env_fns,
                                                observation_space=observation_space,
                                                action_space=action_space,
                                                shared_memory=shared_memory,
                                                copy=copy,
                                                context=context,
                                                daemon=daemon,
                                                worker=worker)
        # we need to overwrite the number of samples as we may sample more than num_envs
        self.observations = create_empty_array(self.single_observation_space,
                                               n=n_samples,
                                               fn=np.zeros)
    def __call__(self, params, contexts=None):
        return self.rollout(params, contexts)
    def rollout_async(self, params, contexts):
        """
        Parameters
        ----------
        params : iterable of samples from `action_space`
            List of actions.
        """
        self._assert_is_running()
        if self._state != AsyncState.DEFAULT:
            raise AlreadyPendingCallError('Calling `rollout_async` while waiting '
                                          'for a pending call to `{0}` to complete.'.format(
                self._state.value), self._state.value)
        params = np.atleast_2d(params)
        split_params = np.array_split(params, np.minimum(len(params), self.num_envs))
        if contexts is None:
            split_contexts = np.array_split([None, ] * len(params), np.minimum(len(params), self.num_envs))
        else:
            split_contexts = np.array_split(contexts, np.minimum(len(contexts), self.num_envs))
        assert np.all([len(p) == len(c) for p, c in zip(split_params, split_contexts)])
        for pipe, param, context in zip(self.parent_pipes, split_params, split_contexts):
            pipe.send(('rollout', (param, context)))
        for pipe in self.parent_pipes[len(split_params):]:
            pipe.send(('idle', None))
        self._state = AsyncState.WAITING_ROLLOUT
    def rollout_wait(self, timeout=None):
        """
        Parameters
        ----------
        timeout : int or float, optional
            Number of seconds before the call to `step_wait` times out. If
            `None`, the call to `step_wait` never times out.
        Returns
        -------
        observations : sample from `observation_space`
            A batch of observations from the vectorized environment.
        rewards : `np.ndarray` instance (dtype `np.float_`)
            A vector of rewards from the vectorized environment.
        dones : `np.ndarray` instance (dtype `np.bool_`)
            A vector whose entries indicate whether the episode has ended.
        infos : list of dict
            A list of auxiliary diagnostic information.
        """
        self._assert_is_running()
        if self._state != AsyncState.WAITING_ROLLOUT:
            raise NoAsyncCallError('Calling `rollout_wait` without any prior call '
                                   'to `rollout_async`.', AsyncState.WAITING_ROLLOUT.value)
        if not self._poll(timeout):
            self._state = AsyncState.DEFAULT
            raise mp.TimeoutError('The call to `rollout_wait` has timed out after '
                                  '{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))
        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
        results = [r for r in results if r is not None]
        self._raise_if_errors(successes)
        self._state = AsyncState.DEFAULT
        rewards, infos = [_flatten_list(r) for r in zip(*results)]
        # for now, we ignore the observations and only return the rewards
        # if not self.shared_memory:
        #     self.observations = concatenate(observations_list, self.observations,
        #                                     self.single_observation_space)
        # return (deepcopy(self.observations) if self.copy else self.observations,
        #         np.array(rewards), np.array(dones, dtype=np.bool_), infos)
        return np.array(rewards), infos
    def rollout(self, actions, contexts):
        self.rollout_async(actions, contexts)
        return self.rollout_wait()
 def _flatten_obs(obs):
    assert isinstance(obs, (list, tuple))
    assert len(obs) > 0
    if isinstance(obs[0], dict):
        keys = obs[0].keys()
        return {k: np.stack([o[k] for o in obs]) for k in keys}
    else:
        return np.stack(obs)
 def _flatten_list(l):
    assert isinstance(l, (list, tuple))
    assert len(l) > 0
    assert all([len(l_) > 0 for l_ in l])
    return [l__ for l_ in l for l__ in l_]
--- a/alr_envs/utils/legacy/dmp_env_wrapper.py
+++ b/alr_envs/utils/legacy/dmp_env_wrapper.py
@ -1,125 +0,0 @@
 from alr_envs.utils.policies import get_policy_class
 from mp_lib.phase import ExpDecayPhaseGenerator
 from mp_lib.basis import DMPBasisGenerator
 from mp_lib import dmps
 import numpy as np
 import gym
 class DmpEnvWrapper(gym.Wrapper):
    def __init__(self,
                 env,
                 num_dof,
                 num_basis,
                 start_pos=None,
                 final_pos=None,
                 duration=1,
                 dt=0.01,
                 alpha_phase=2,
                 bandwidth_factor=3,
                 learn_goal=False,
                 post_traj_time=0.,
                 policy_type=None,
                 weights_scale=1.,
                 goal_scale=1.,
                 ):
        super(DmpEnvWrapper, self).__init__(env)
        self.num_dof = num_dof
        self.num_basis = num_basis
        self.dim = num_dof * num_basis
        if learn_goal:
            self.dim += num_dof
        self.learn_goal = learn_goal
        self.duration = duration   # seconds
        time_steps = int(duration / dt)
        self.t = np.linspace(0, duration, time_steps)
        self.post_traj_steps = int(post_traj_time / dt)
        phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
        basis_generator = DMPBasisGenerator(phase_generator,
                                            duration=duration,
                                            num_basis=self.num_basis,
                                            basis_bandwidth_factor=bandwidth_factor)
        self.dmp = dmps.DMP(num_dof=num_dof,
                            basis_generator=basis_generator,
                            phase_generator=phase_generator,
                            num_time_steps=time_steps,
                            dt=dt
                            )
        self.dmp.dmp_start_pos = start_pos.reshape((1, num_dof))
        dmp_weights = np.zeros((num_basis, num_dof))
        if learn_goal:
            dmp_goal_pos = np.zeros(num_dof)
        else:
            dmp_goal_pos = final_pos
        self.dmp.set_weights(dmp_weights, dmp_goal_pos)
        self.weights_scale = weights_scale
        self.goal_scale = goal_scale
        policy_class = get_policy_class(policy_type)
        self.policy = policy_class(env)
    def __call__(self, params, contexts=None):
        params = np.atleast_2d(params)
        rewards = []
        infos = []
        for p, c in zip(params, contexts):
            reward, info = self.rollout(p, c)
            rewards.append(reward)
            infos.append(info)
        return np.array(rewards), infos
    def goal_and_weights(self, params):
        if len(params.shape) > 1:
            assert params.shape[1] == self.dim
        else:
            assert len(params) == self.dim
            params = np.reshape(params, [1, self.dim])
        if self.learn_goal:
            goal_pos = params[0, -self.num_dof:]
            weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
        else:
            goal_pos = self.dmp.dmp_goal_pos.flatten()
            assert goal_pos is not None
            weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
        return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
    def rollout(self, params, context=None, render=False):
        """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
        goal_pos, weight_matrix = self.goal_and_weights(params)
        self.dmp.set_weights(weight_matrix, goal_pos)
        trajectory, velocity = self.dmp.reference_trajectory(self.t)
        if self.post_traj_steps > 0:
            trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
            velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.num_dof))])
        self._trajectory = trajectory
        self._velocity = velocity
        rews = []
        infos = []
        self.env.configure(context)
        self.env.reset()
        for t, pos_vel in enumerate(zip(trajectory, velocity)):
            ac = self.policy.get_action(pos_vel[0], pos_vel[1])
            obs, rew, done, info = self.env.step(ac)
            rews.append(rew)
            infos.append(info)
            if render:
                self.env.render(mode="human")
            if done:
                break
        reward = np.sum(rews)
        return reward, info
--- a/alr_envs/utils/legacy/dmp_env_wrapper_example.py
+++ b/alr_envs/utils/legacy/dmp_env_wrapper_example.py
@ -1,28 +0,0 @@
 from alr_envs.utils.legacy.utils import make_holereacher_env
 import numpy as np
 if __name__ == "__main__":
    n_samples = 1
    n_cpus = 4
    dim = 30
    # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
    #                         n_samples=n_samples)
    test_env = make_holereacher_env(0)()
    # params = np.random.randn(n_samples, dim)
    params = np.array([[1.386102, -3.29980525, 4.70402733, 1.3966668, 0.73774902,
                        3.14676681, -4.98644416, 6.20303193, 1.30502127, -0.09330522,
                        7.62656797, -5.76893033, 3.4706711, -0.6944142, -3.33442788,
                        12.31421548, -0.72760271, -6.9090723, 7.02903814, -8.7236836,
                        1.4805914, 0.53185824, -5.46626893, 0.69692163, 13.58472666,
                        0.77199316, 2.02906724, -3.0203244, -1.00533159, -0.57417351]])
    # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
    rew, info = test_env.rollout(params, render=True)
    print(rew)
    # out = env(params)
    # print(out)
--- a/alr_envs/utils/legacy/dmp_pd_control_example.py
+++ b/alr_envs/utils/legacy/dmp_pd_control_example.py
@ -1,28 +0,0 @@
 from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_dmp_env
 import numpy as np
 if __name__ == "__main__":
    dim = 15
    n_cpus = 4
    # n_samples = 10
    #
    # vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
    #                             n_samples=n_samples)
    #
    # params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
    #
    # rewards, infos = vec_env(params)
    # print(rewards)
    #
    non_vec_env = make_simple_dmp_env(0, 0)()
    # params = 0.5 * np.random.randn(dim)
    params = np.array([-2.63357598, -1.04950296, -0.44330737,  0.52950017,  4.29247739,
        4.52473661, -0.05685977, -0.76796851,  3.71540499,  1.22631059,
        2.20412438,  3.91588129, -0.12652723, -3.0788211 ,  0.56204464])
    out2 = non_vec_env.rollout(params, render=True )
    print(out2)
--- a/alr_envs/utils/legacy/utils.py
+++ b/alr_envs/utils/legacy/utils.py
@ -1,145 +0,0 @@
 import alr_envs.classic_control.hole_reacher as hr
 import alr_envs.classic_control.viapoint_reacher as vpr
 from alr_envs.utils.mps.dmp_wrapper import DmpWrapper
 from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
 import numpy as np
 def make_viapointreacher_env(rank, seed=0):
    """
    Utility function for multiprocessed env.
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param rank: (int) index of the subprocess
    :returns a function that generates an environment
    """
    def _init():
        _env = vpr.ViaPointReacher(n_links=5,
                               allow_self_collision=False,
                               collision_penalty=1000)
        _env = DmpWrapper(_env,
                          num_dof=5,
                          num_basis=5,
                          duration=2,
                          alpha_phase=2.5,
                          dt=_env.dt,
                          start_pos=_env.start_pos,
                          learn_goal=False,
                          policy_type="velocity",
                          weights_scale=50)
        _env.seed(seed + rank)
        return _env
    return _init
 def make_holereacher_env(rank, seed=0):
    """
    Utility function for multiprocessed env.
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param rank: (int) index of the subprocess
    :returns a function that generates an environment
    """
    def _init():
        _env = hr.HoleReacherEnv(n_links=5,
                                 allow_self_collision=False,
                                 allow_wall_collision=False,
                                 hole_width=0.25,
                                 hole_depth=1,
                                 hole_x=2,
                                 collision_penalty=100)
        _env = DmpWrapper(_env,
                          num_dof=5,
                          num_basis=5,
                          duration=2,
                          bandwidth_factor=2,
                          dt=_env.dt,
                          learn_goal=True,
                          alpha_phase=2,
                          start_pos=_env._start_pos,
                          policy_type="velocity",
                          weights_scale=50,
                          goal_scale=0.1
                          )
        _env.seed(seed + rank)
        return _env
    return _init
 def make_holereacher_fix_goal_env(rank, seed=0):
    """
    Utility function for multiprocessed env.
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param rank: (int) index of the subprocess
    :returns a function that generates an environment
    """
    def _init():
        _env = hr.HoleReacherEnv(n_links=5,
                                 allow_self_collision=False,
                                 allow_wall_collision=False,
                                 hole_width=0.15,
                                 hole_depth=1,
                                 hole_x=1,
                                 collision_penalty=100)
        _env = DmpWrapper(_env,
                          num_dof=5,
                          num_basis=5,
                          duration=2,
                          dt=_env.dt,
                          learn_goal=False,
                          final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476,  0.02012344]),
                          alpha_phase=2,
                          start_pos=_env._start_pos,
                          policy_type="velocity",
                          weights_scale=50,
                          goal_scale=1
                          )
        _env.seed(seed + rank)
        return _env
    return _init
 def make_holereacher_env_pmp(rank, seed=0):
    """
    Utility function for multiprocessed env.
    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param rank: (int) index of the subprocess
    :returns a function that generates an environment
    """
    def _init():
        _env = hr.HoleReacherEnv(n_links=5,
                                 allow_self_collision=False,
                                 allow_wall_collision=False,
                                 hole_width=0.15,
                                 hole_depth=1,
                                 hole_x=1,
                                 collision_penalty=1000)
        _env = DetPMPWrapper(_env, num_dof=5, num_basis=5, width=0.02, duration=2, dt=_env.dt, post_traj_time=0,
                             policy_type="velocity", weights_scale=0.2, zero_start=True, zero_goal=False)
        _env.seed(seed + rank)
        return _env
    return _init