fancy_gym/alr_envs/utils/wrapper/dmp_wrapper.py

from mp_lib.phase import ExpDecayPhaseGenerator
from mp_lib.basis import DMPBasisGenerator
from mp_lib import dmps
import numpy as np
import gym

from alr_envs.utils.wrapper.mp_wrapper import MPWrapper


class DmpWrapper(MPWrapper):

    def __init__(self, env: gym.Env, num_dof: int, num_basis: int,
                 # start_pos: np.ndarray = None,
                 # final_pos: np.ndarray = None,
                 duration: int = 1, alpha_phase: float = 2., dt: float = None,
                 learn_goal: bool = False, return_to_start: bool = False, post_traj_time: float = 0.,
                 weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
                 policy_type: str = None, render_mode: str = None):

        """
        This Wrapper generates a trajectory based on a DMP and will only return episodic performances.
        Args:
            env:
            num_dof:
            num_basis:
            start_pos:
            final_pos:
            duration:
            alpha_phase:
            dt:
            learn_goal:
            post_traj_time:
            policy_type:
            weights_scale:
            goal_scale:
        """
        self.learn_goal = learn_goal
        dt = env.dt if hasattr(env, "dt") else dt
        assert dt is not None
        # start_pos = start_pos if start_pos is not None else env.start_pos if hasattr(env, "start_pos") else None
        # TODO: assert start_pos is not None  # start_pos will be set in initialize, do we need this here?
        # if learn_goal:
            # final_pos = np.zeros_like(start_pos)  # arbitrary, will be learned
            # final_pos = np.zeros((1, num_dof))  # arbitrary, will be learned
        # else:
        #     final_pos = final_pos if final_pos is not None else start_pos if return_to_start else None
        # assert final_pos is not None
        self.t = np.linspace(0, duration, int(duration / dt))
        self.goal_scale = goal_scale

        super().__init__(env, num_dof, duration, dt, post_traj_time, policy_type, weights_scale, render_mode,
                         num_basis=num_basis,
                         # start_pos=start_pos, final_pos=final_pos,
                         alpha_phase=alpha_phase,
                         bandwidth_factor=bandwidth_factor)

        action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))
        self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)

    def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5,
                      # start_pos: np.ndarray = None,
                      # final_pos: np.ndarray = None,
                      alpha_phase: float = 2., bandwidth_factor: float = 3.):

        phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
        basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=num_basis,
                                            basis_bandwidth_factor=bandwidth_factor)

        dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
                       num_time_steps=int(duration / dt), dt=dt)

        # dmp.dmp_start_pos = start_pos.reshape((1, num_dof))
        # in a contextual environment, the start_pos may be not fixed, set in mp_rollout?
        # TODO: Should we set start_pos in init at all? It's only used after calling rollout anyway...
        # dmp.dmp_start_pos = start_pos.reshape((1, num_dof)) if start_pos is not None else np.zeros((1, num_dof))

        # weights = np.zeros((num_basis, num_dof))
        # goal_pos = np.zeros(num_dof) if self.learn_goal else final_pos

        # dmp.set_weights(weights, goal_pos)
        return dmp

    def goal_and_weights(self, params):
        assert params.shape[-1] == self.action_space.shape[0]
        params = np.atleast_2d(params)

        if self.learn_goal:
            goal_pos = params[0, -self.mp.num_dimensions:]  # [num_dof]
            params = params[:, :-self.mp.num_dimensions]  # [1,num_dof]
            # weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
        else:
            goal_pos = self.env.goal_pos  # self.mp.dmp_goal_pos.flatten()
            assert goal_pos is not None
            # weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])

        weight_matrix = np.reshape(params, self.mp.dmp_weights.shape)
        return goal_pos * self.goal_scale, weight_matrix * self.weights_scale

    def mp_rollout(self, action):
        # if self.mp.start_pos is None:
        self.mp.dmp_start_pos = self.env.init_qpos.reshape((1, self.num_dof))  # start_pos
        goal_pos, weight_matrix = self.goal_and_weights(action)
        self.mp.set_weights(weight_matrix, goal_pos)
        return self.mp.reference_trajectory(self.t)
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00			`from mp_lib.phase import ExpDecayPhaseGenerator`
			`from mp_lib.basis import DMPBasisGenerator`
			`from mp_lib import dmps`
			`import numpy as np`
			`import gym`

			`from alr_envs.utils.wrapper.mp_wrapper import MPWrapper`


			`class DmpWrapper(MPWrapper):`

wip 2021-05-10 12:17:52 +02:00			`def __init__(self, env: gym.Env, num_dof: int, num_basis: int,`
			`# start_pos: np.ndarray = None,`
			`# final_pos: np.ndarray = None,`
			`duration: int = 1, alpha_phase: float = 2., dt: float = None,`
updates 2021-04-23 11:37:42 +02:00			`learn_goal: bool = False, return_to_start: bool = False, post_traj_time: float = 0.,`
			`weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,`
fix rendering 2021-04-30 16:22:33 +02:00			`policy_type: str = None, render_mode: str = None):`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00
			`"""`
			`This Wrapper generates a trajectory based on a DMP and will only return episodic performances.`
			`Args:`
			`env:`
			`num_dof:`
			`num_basis:`
			`start_pos:`
			`final_pos:`
			`duration:`
			`alpha_phase:`
			`dt:`
			`learn_goal:`
			`post_traj_time:`
			`policy_type:`
			`weights_scale:`
			`goal_scale:`
			`"""`
			`self.learn_goal = learn_goal`
wip 2021-04-21 10:45:34 +02:00			`dt = env.dt if hasattr(env, "dt") else dt`
			`assert dt is not None`
wip 2021-05-10 12:17:52 +02:00			`# start_pos = start_pos if start_pos is not None else env.start_pos if hasattr(env, "start_pos") else None`
start contextual dmp wrapper 2021-05-07 09:51:53 +02:00			`# TODO: assert start_pos is not None # start_pos will be set in initialize, do we need this here?`
wip 2021-05-10 12:17:52 +02:00			`# if learn_goal:`
start contextual dmp wrapper 2021-05-07 09:51:53 +02:00			`# final_pos = np.zeros_like(start_pos) # arbitrary, will be learned`
wip 2021-05-10 12:17:52 +02:00			`# final_pos = np.zeros((1, num_dof)) # arbitrary, will be learned`
			`# else:`
			`# final_pos = final_pos if final_pos is not None else start_pos if return_to_start else None`
			`# assert final_pos is not None`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00			`self.t = np.linspace(0, duration, int(duration / dt))`
			`self.goal_scale = goal_scale`

fix rendering 2021-04-30 16:22:33 +02:00			`super().__init__(env, num_dof, duration, dt, post_traj_time, policy_type, weights_scale, render_mode,`
wip 2021-05-10 12:17:52 +02:00			`num_basis=num_basis,`
			`# start_pos=start_pos, final_pos=final_pos,`
			`alpha_phase=alpha_phase,`
wip 2021-04-21 10:45:34 +02:00			`bandwidth_factor=bandwidth_factor)`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00
			`action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))`
			`self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)`

wip 2021-05-10 12:17:52 +02:00			`def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5,`
			`# start_pos: np.ndarray = None,`
			`# final_pos: np.ndarray = None,`
			`alpha_phase: float = 2., bandwidth_factor: float = 3.):`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00
			`phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)`
wip 2021-04-21 10:45:34 +02:00			`basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=num_basis,`
			`basis_bandwidth_factor=bandwidth_factor)`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00
			`dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,`
			`num_time_steps=int(duration / dt), dt=dt)`

start contextual dmp wrapper 2021-05-07 09:51:53 +02:00			`# dmp.dmp_start_pos = start_pos.reshape((1, num_dof))`
			`# in a contextual environment, the start_pos may be not fixed, set in mp_rollout?`
			`# TODO: Should we set start_pos in init at all? It's only used after calling rollout anyway...`
wip 2021-05-10 12:17:52 +02:00			`# dmp.dmp_start_pos = start_pos.reshape((1, num_dof)) if start_pos is not None else np.zeros((1, num_dof))`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00
wip 2021-05-10 12:17:52 +02:00			`# weights = np.zeros((num_basis, num_dof))`
			`# goal_pos = np.zeros(num_dof) if self.learn_goal else final_pos`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00
wip 2021-05-10 12:17:52 +02:00			`# dmp.set_weights(weights, goal_pos)`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00			`return dmp`

			`def goal_and_weights(self, params):`
			`assert params.shape[-1] == self.action_space.shape[0]`
			`params = np.atleast_2d(params)`

			`if self.learn_goal:`
			`goal_pos = params[0, -self.mp.num_dimensions:] # [num_dof]`
			`params = params[:, :-self.mp.num_dimensions] # [1,num_dof]`
			`# weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])`
			`else:`
wip 2021-05-10 12:17:52 +02:00			`goal_pos = self.env.goal_pos # self.mp.dmp_goal_pos.flatten()`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00			`assert goal_pos is not None`
			`# weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])`

			`weight_matrix = np.reshape(params, self.mp.dmp_weights.shape)`
			`return goal_pos * self.goal_scale, weight_matrix * self.weights_scale`

			`def mp_rollout(self, action):`
wip 2021-05-10 12:17:52 +02:00			`# if self.mp.start_pos is None:`
context wip 2021-05-17 09:32:51 +02:00			`self.mp.dmp_start_pos = self.env.init_qpos.reshape((1, self.num_dof)) # start_pos`
refractoring of DMP environmets to fit gym interface better. 2021-03-26 14:05:16 +01:00			`goal_pos, weight_matrix = self.goal_and_weights(action)`
			`self.mp.set_weights(weight_matrix, goal_pos)`
			`return self.mp.reference_trajectory(self.t)`