2021-03-26 14:05:16 +01:00
|
|
|
from mp_lib.phase import ExpDecayPhaseGenerator
|
|
|
|
from mp_lib.basis import DMPBasisGenerator
|
|
|
|
from mp_lib import dmps
|
|
|
|
import numpy as np
|
|
|
|
import gym
|
|
|
|
|
|
|
|
from alr_envs.utils.wrapper.mp_wrapper import MPWrapper
|
|
|
|
|
|
|
|
|
|
|
|
class DmpWrapper(MPWrapper):
|
|
|
|
|
2021-05-10 12:17:52 +02:00
|
|
|
def __init__(self, env: gym.Env, num_dof: int, num_basis: int,
|
|
|
|
# start_pos: np.ndarray = None,
|
|
|
|
# final_pos: np.ndarray = None,
|
|
|
|
duration: int = 1, alpha_phase: float = 2., dt: float = None,
|
2021-04-23 11:37:42 +02:00
|
|
|
learn_goal: bool = False, return_to_start: bool = False, post_traj_time: float = 0.,
|
|
|
|
weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
|
2021-04-30 16:22:33 +02:00
|
|
|
policy_type: str = None, render_mode: str = None):
|
2021-03-26 14:05:16 +01:00
|
|
|
|
|
|
|
"""
|
|
|
|
This Wrapper generates a trajectory based on a DMP and will only return episodic performances.
|
|
|
|
Args:
|
|
|
|
env:
|
|
|
|
num_dof:
|
|
|
|
num_basis:
|
|
|
|
start_pos:
|
|
|
|
final_pos:
|
|
|
|
duration:
|
|
|
|
alpha_phase:
|
|
|
|
dt:
|
|
|
|
learn_goal:
|
|
|
|
post_traj_time:
|
|
|
|
policy_type:
|
|
|
|
weights_scale:
|
|
|
|
goal_scale:
|
|
|
|
"""
|
|
|
|
self.learn_goal = learn_goal
|
2021-04-21 10:45:34 +02:00
|
|
|
dt = env.dt if hasattr(env, "dt") else dt
|
|
|
|
assert dt is not None
|
2021-05-10 12:17:52 +02:00
|
|
|
# start_pos = start_pos if start_pos is not None else env.start_pos if hasattr(env, "start_pos") else None
|
2021-05-07 09:51:53 +02:00
|
|
|
# TODO: assert start_pos is not None # start_pos will be set in initialize, do we need this here?
|
2021-05-10 12:17:52 +02:00
|
|
|
# if learn_goal:
|
2021-05-07 09:51:53 +02:00
|
|
|
# final_pos = np.zeros_like(start_pos) # arbitrary, will be learned
|
2021-05-10 12:17:52 +02:00
|
|
|
# final_pos = np.zeros((1, num_dof)) # arbitrary, will be learned
|
|
|
|
# else:
|
|
|
|
# final_pos = final_pos if final_pos is not None else start_pos if return_to_start else None
|
|
|
|
# assert final_pos is not None
|
2021-03-26 14:05:16 +01:00
|
|
|
self.t = np.linspace(0, duration, int(duration / dt))
|
|
|
|
self.goal_scale = goal_scale
|
|
|
|
|
2021-04-30 16:22:33 +02:00
|
|
|
super().__init__(env, num_dof, duration, dt, post_traj_time, policy_type, weights_scale, render_mode,
|
2021-05-10 12:17:52 +02:00
|
|
|
num_basis=num_basis,
|
|
|
|
# start_pos=start_pos, final_pos=final_pos,
|
|
|
|
alpha_phase=alpha_phase,
|
2021-04-21 10:45:34 +02:00
|
|
|
bandwidth_factor=bandwidth_factor)
|
2021-03-26 14:05:16 +01:00
|
|
|
|
|
|
|
action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))
|
|
|
|
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
|
|
|
|
|
2021-05-10 12:17:52 +02:00
|
|
|
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5,
|
|
|
|
# start_pos: np.ndarray = None,
|
|
|
|
# final_pos: np.ndarray = None,
|
|
|
|
alpha_phase: float = 2., bandwidth_factor: float = 3.):
|
2021-03-26 14:05:16 +01:00
|
|
|
|
|
|
|
phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
|
2021-04-21 10:45:34 +02:00
|
|
|
basis_generator = DMPBasisGenerator(phase_generator, duration=duration, num_basis=num_basis,
|
|
|
|
basis_bandwidth_factor=bandwidth_factor)
|
2021-03-26 14:05:16 +01:00
|
|
|
|
|
|
|
dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
|
|
|
|
num_time_steps=int(duration / dt), dt=dt)
|
|
|
|
|
2021-05-07 09:51:53 +02:00
|
|
|
# dmp.dmp_start_pos = start_pos.reshape((1, num_dof))
|
|
|
|
# in a contextual environment, the start_pos may be not fixed, set in mp_rollout?
|
|
|
|
# TODO: Should we set start_pos in init at all? It's only used after calling rollout anyway...
|
2021-05-10 12:17:52 +02:00
|
|
|
# dmp.dmp_start_pos = start_pos.reshape((1, num_dof)) if start_pos is not None else np.zeros((1, num_dof))
|
2021-03-26 14:05:16 +01:00
|
|
|
|
2021-05-10 12:17:52 +02:00
|
|
|
# weights = np.zeros((num_basis, num_dof))
|
|
|
|
# goal_pos = np.zeros(num_dof) if self.learn_goal else final_pos
|
2021-03-26 14:05:16 +01:00
|
|
|
|
2021-05-10 12:17:52 +02:00
|
|
|
# dmp.set_weights(weights, goal_pos)
|
2021-03-26 14:05:16 +01:00
|
|
|
return dmp
|
|
|
|
|
|
|
|
def goal_and_weights(self, params):
|
|
|
|
assert params.shape[-1] == self.action_space.shape[0]
|
|
|
|
params = np.atleast_2d(params)
|
|
|
|
|
|
|
|
if self.learn_goal:
|
|
|
|
goal_pos = params[0, -self.mp.num_dimensions:] # [num_dof]
|
|
|
|
params = params[:, :-self.mp.num_dimensions] # [1,num_dof]
|
|
|
|
# weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
|
|
|
|
else:
|
2021-05-10 12:17:52 +02:00
|
|
|
goal_pos = self.env.goal_pos # self.mp.dmp_goal_pos.flatten()
|
2021-03-26 14:05:16 +01:00
|
|
|
assert goal_pos is not None
|
|
|
|
# weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
|
|
|
|
|
|
|
|
weight_matrix = np.reshape(params, self.mp.dmp_weights.shape)
|
|
|
|
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
|
|
|
|
|
|
|
|
def mp_rollout(self, action):
|
2021-05-10 12:17:52 +02:00
|
|
|
# if self.mp.start_pos is None:
|
2021-05-17 09:32:51 +02:00
|
|
|
self.mp.dmp_start_pos = self.env.init_qpos.reshape((1, self.num_dof)) # start_pos
|
2021-03-26 14:05:16 +01:00
|
|
|
goal_pos, weight_matrix = self.goal_and_weights(action)
|
|
|
|
self.mp.set_weights(weight_matrix, goal_pos)
|
|
|
|
return self.mp.reference_trajectory(self.t)
|