working bp version
This commit is contained in:
parent
f33996c27a
commit
2fbde9fbb1
@ -391,7 +391,8 @@ register(
|
|||||||
max_episode_steps=600,
|
max_episode_steps=600,
|
||||||
kwargs={
|
kwargs={
|
||||||
"rndm_goal": False,
|
"rndm_goal": False,
|
||||||
"cup_goal_pos": [-0.3, -1.2]
|
"cup_goal_pos": [0.1, -2.0],
|
||||||
|
"learn_release_step": True
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -403,7 +404,8 @@ register(
|
|||||||
max_episode_steps=600,
|
max_episode_steps=600,
|
||||||
kwargs={
|
kwargs={
|
||||||
"rndm_goal": True,
|
"rndm_goal": True,
|
||||||
"cup_goal_pos": [-0.3, -1.2]
|
"cup_goal_pos": [-0.3, -1.2],
|
||||||
|
"learn_release_step": True
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -546,34 +548,72 @@ for _v in _versions:
|
|||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
# ## Beerpong
|
||||||
|
# _versions = ["v0", "v1"]
|
||||||
|
# for _v in _versions:
|
||||||
|
# _env_id = f'BeerpongProMP-{_v}'
|
||||||
|
# register(
|
||||||
|
# id=_env_id,
|
||||||
|
# entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
||||||
|
# kwargs={
|
||||||
|
# "name": f"alr_envs:ALRBeerPong-{_v}",
|
||||||
|
# "wrappers": [mujoco.beerpong.MPWrapper],
|
||||||
|
# "mp_kwargs": {
|
||||||
|
# "num_dof": 7,
|
||||||
|
# "num_basis": 2,
|
||||||
|
# # "duration": 1,
|
||||||
|
# "duration": 0.5,
|
||||||
|
# # "post_traj_time": 2,
|
||||||
|
# "post_traj_time": 2.5,
|
||||||
|
# "policy_type": "motor",
|
||||||
|
# "weights_scale": 0.14,
|
||||||
|
# # "weights_scale": 1,
|
||||||
|
# "zero_start": True,
|
||||||
|
# "zero_goal": False,
|
||||||
|
# "policy_kwargs": {
|
||||||
|
# "p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
||||||
|
# "d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
# ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
## Beerpong
|
## Beerpong
|
||||||
_versions = ["v0", "v1"]
|
_versions = ["v0", "v1"]
|
||||||
for _v in _versions:
|
for _v in _versions:
|
||||||
_env_id = f'BeerpongProMP-{_v}'
|
_env_id = f'BeerpongProMP-{_v}'
|
||||||
register(
|
register(
|
||||||
id=_env_id,
|
id=_env_id,
|
||||||
entry_point='alr_envs.utils.make_env_helpers:make_promp_env_helper',
|
entry_point='alr_envs.utils.make_env_helpers:make_mp_env_helper',
|
||||||
kwargs={
|
kwargs={
|
||||||
"name": f"alr_envs:ALRBeerPong-{_v}",
|
"name": f"alr_envs:ALRBeerPong-{_v}",
|
||||||
"wrappers": [mujoco.beerpong.MPWrapper],
|
"wrappers": [mujoco.beerpong.NewMPWrapper],
|
||||||
"mp_kwargs": {
|
"ep_wrapper_kwargs": {
|
||||||
"num_dof": 7,
|
"weight_scale": 1
|
||||||
"num_basis": 2,
|
},
|
||||||
# "duration": 1,
|
"movement_primitives_kwargs": {
|
||||||
"duration": 0.5,
|
'movement_primitives_type': 'promp',
|
||||||
# "post_traj_time": 2,
|
'num_dof': 7
|
||||||
"post_traj_time": 2.5,
|
},
|
||||||
"policy_type": "motor",
|
"phase_generator_kwargs": {
|
||||||
"weights_scale": 0.14,
|
'phase_generator_type': 'linear',
|
||||||
# "weights_scale": 1,
|
'delay': 0,
|
||||||
"zero_start": True,
|
'tau': 0.8, # initial value
|
||||||
"zero_goal": False,
|
'learn_tau': True,
|
||||||
"policy_kwargs": {
|
'learn_delay': False
|
||||||
"p_gains": np.array([ 1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
},
|
||||||
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125])
|
"controller_kwargs": {
|
||||||
|
'controller_type': 'motor',
|
||||||
|
"p_gains": np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
|
||||||
|
"d_gains": np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
|
||||||
|
},
|
||||||
|
"basis_generator_kwargs": {
|
||||||
|
'basis_generator_type': 'zero_rbf',
|
||||||
|
'num_basis': 2,
|
||||||
|
'num_basis_zero_start': 2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
)
|
)
|
||||||
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
ALL_ALR_MOTION_PRIMITIVE_ENVIRONMENTS["ProMP"].append(_env_id)
|
||||||
|
|
||||||
|
@ -1 +1,2 @@
|
|||||||
from .mp_wrapper import MPWrapper
|
from .mp_wrapper import MPWrapper
|
||||||
|
from .new_mp_wrapper import NewMPWrapper
|
@ -3,6 +3,7 @@ import os
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym import utils
|
from gym import utils
|
||||||
|
from gym import spaces
|
||||||
from gym.envs.mujoco import MujocoEnv
|
from gym.envs.mujoco import MujocoEnv
|
||||||
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
from alr_envs.alr.mujoco.beerpong.beerpong_reward_staged import BeerPongReward
|
||||||
|
|
||||||
@ -17,7 +18,7 @@ CUP_POS_MAX = np.array([0.32, -1.2])
|
|||||||
|
|
||||||
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
def __init__(self, frame_skip=1, apply_gravity_comp=True, noisy=False,
|
||||||
rndm_goal=False, cup_goal_pos=None):
|
rndm_goal=False, learn_release_step=True, cup_goal_pos=None):
|
||||||
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
cup_goal_pos = np.array(cup_goal_pos if cup_goal_pos is not None else [-0.3, -1.2, 0.840])
|
||||||
if cup_goal_pos.shape[0]==2:
|
if cup_goal_pos.shape[0]==2:
|
||||||
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
|
cup_goal_pos = np.insert(cup_goal_pos, 2, 0.840)
|
||||||
@ -51,10 +52,9 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
self.noise_std = 0.01
|
self.noise_std = 0.01
|
||||||
else:
|
else:
|
||||||
self.noise_std = 0
|
self.noise_std = 0
|
||||||
|
self.learn_release_step = learn_release_step
|
||||||
reward_function = BeerPongReward
|
reward_function = BeerPongReward
|
||||||
self.reward_function = reward_function()
|
self.reward_function = reward_function()
|
||||||
self.n_table_bounces_first = 0
|
|
||||||
|
|
||||||
MujocoEnv.__init__(self, self.xml_path, frame_skip)
|
MujocoEnv.__init__(self, self.xml_path, frame_skip)
|
||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
@ -63,6 +63,13 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
def start_pos(self):
|
def start_pos(self):
|
||||||
return self._start_pos
|
return self._start_pos
|
||||||
|
|
||||||
|
def _set_action_space(self):
|
||||||
|
bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
|
||||||
|
bounds = np.concatenate((bounds, [[50, self.ep_length*0.333]]), axis=0)
|
||||||
|
low, high = bounds.T
|
||||||
|
self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
|
||||||
|
return self.action_space
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start_vel(self):
|
def start_vel(self):
|
||||||
return self._start_vel
|
return self._start_vel
|
||||||
@ -76,8 +83,6 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return self.sim.data.qvel[0:7].copy()
|
return self.sim.data.qvel[0:7].copy()
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
print(not self.reward_function.ball_ground_contact_first)
|
|
||||||
self.n_table_bounces_first += int(not self.reward_function.ball_ground_contact_first)
|
|
||||||
self.reward_function.reset(self.add_noise)
|
self.reward_function.reset(self.add_noise)
|
||||||
return super().reset()
|
return super().reset()
|
||||||
|
|
||||||
@ -104,14 +109,17 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
|
self._release_step = a[-1] if self.learn_release_step else self._release_step
|
||||||
|
self._release_step = np.clip(self._release_step, self.action_space.low[-1], self.action_space.high[-1]) \
|
||||||
|
if self.learn_release_step else self._release_step
|
||||||
reward_dist = 0.0
|
reward_dist = 0.0
|
||||||
angular_vel = 0.0
|
angular_vel = 0.0
|
||||||
reward_ctrl = - np.square(a).sum()
|
applied_action = a[:a.shape[0]-int(self.learn_release_step)]
|
||||||
|
reward_ctrl = - np.square(applied_action).sum()
|
||||||
if self.apply_gravity_comp:
|
if self.apply_gravity_comp:
|
||||||
a = a + self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
|
applied_action += self.sim.data.qfrc_bias[:len(applied_action)].copy() / self.model.actuator_gear[:, 0]
|
||||||
try:
|
try:
|
||||||
self.do_simulation(a, self.frame_skip)
|
self.do_simulation(applied_action, self.frame_skip)
|
||||||
if self._steps < self._release_step:
|
if self._steps < self._release_step:
|
||||||
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
self.sim.data.qpos[7::] = self.sim.data.site_xpos[self.ball_site_id, :].copy()
|
||||||
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
|
self.sim.data.qvel[7::] = self.sim.data.site_xvelp[self.ball_site_id, :].copy()
|
||||||
@ -125,7 +133,7 @@ class ALRBeerBongEnv(MujocoEnv, utils.EzPickle):
|
|||||||
ob = self._get_obs()
|
ob = self._get_obs()
|
||||||
|
|
||||||
if not crash:
|
if not crash:
|
||||||
reward, reward_infos = self.reward_function.compute_reward(self, a)
|
reward, reward_infos = self.reward_function.compute_reward(self, applied_action)
|
||||||
success = reward_infos['success']
|
success = reward_infos['success']
|
||||||
is_collided = reward_infos['is_collided']
|
is_collided = reward_infos['is_collided']
|
||||||
ball_pos = reward_infos['ball_pos']
|
ball_pos = reward_infos['ball_pos']
|
||||||
|
@ -162,6 +162,10 @@ class BeerPongReward:
|
|||||||
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
|
min_dist_coeff, final_dist_coeff, rew_offset = 0, 1, 0
|
||||||
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
|
reward = rew_offset - min_dist_coeff * min_dist ** 2 - final_dist_coeff * final_dist ** 2 - \
|
||||||
1e-4 * np.mean(action_cost)
|
1e-4 * np.mean(action_cost)
|
||||||
|
if env.learn_release_step and not self.ball_in_cup:
|
||||||
|
too_small = (env._release_step<50)*(env._release_step-50)**2
|
||||||
|
too_big = (env._release_step>200)*0.2*(env._release_step-200)**2
|
||||||
|
reward = reward - too_small -too_big
|
||||||
# 1e-7*np.mean(action_cost)
|
# 1e-7*np.mean(action_cost)
|
||||||
success = self.ball_in_cup
|
success = self.ball_in_cup
|
||||||
else:
|
else:
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
from mp_wrapper import BaseMPWrapper
|
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
|
||||||
from typing import Union, Tuple
|
from typing import Union, Tuple
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import gym
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(BaseMPWrapper):
|
class NewMPWrapper(EpisodicWrapper):
|
||||||
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qpos[0:7].copy()
|
return self.env.sim.data.qpos[0:7].copy()
|
||||||
|
|
||||||
|
@property
|
||||||
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
return self.env.sim.data.qvel[0:7].copy()
|
return self.env.sim.data.qvel[0:7].copy()
|
||||||
|
|
||||||
@ -18,3 +20,25 @@ class MPWrapper(BaseMPWrapper):
|
|||||||
[True] * 2, # xy position of cup
|
[True] * 2, # xy position of cup
|
||||||
[False] # env steps
|
[False] # env steps
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
|
||||||
|
if self.env.learn_release_step:
|
||||||
|
return np.concatenate((step_action, np.atleast_1d(env_spec_params)))
|
||||||
|
else:
|
||||||
|
return step_action
|
||||||
|
|
||||||
|
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||||
|
if self.env.learn_release_step:
|
||||||
|
return action[:-1], action[-1] # mp_params, release step
|
||||||
|
else:
|
||||||
|
return action, None
|
||||||
|
|
||||||
|
def set_action_space(self):
|
||||||
|
if self.env.learn_release_step:
|
||||||
|
min_action_bounds, max_action_bounds = self.mp.get_param_bounds()
|
||||||
|
min_action_bounds = np.concatenate((min_action_bounds.numpy(), [self.env.action_space.low[-1]]))
|
||||||
|
max_action_bounds = np.concatenate((max_action_bounds.numpy(), [self.env.action_space.high[-1]]))
|
||||||
|
self.mp_action_space = gym.spaces.Box(low=min_action_bounds, high=max_action_bounds, dtype=np.float32)
|
||||||
|
return self.mp_action_space
|
||||||
|
else:
|
||||||
|
return super(NewMPWrapper, self).set_action_space()
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
from mp_wrapper import BaseMPWrapper
|
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
|
||||||
from typing import Union, Tuple
|
from typing import Union, Tuple
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
class MPWrapper(BaseMPWrapper):
|
class MPWrapper(EpisodicWrapper):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
|
||||||
|
@ -39,7 +39,7 @@ actual_vel = np.zeros((len(pos), *base_shape))
|
|||||||
act = np.zeros((len(pos), *base_shape))
|
act = np.zeros((len(pos), *base_shape))
|
||||||
|
|
||||||
for t, pos_vel in enumerate(zip(pos, vel)):
|
for t, pos_vel in enumerate(zip(pos, vel)):
|
||||||
actions = env.policy.get_action(pos_vel[0], pos_vel[1])
|
actions = env.policy.get_action(pos_vel[0], pos_vel[1],, self.current_vel, self.current_pos
|
||||||
actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high)
|
actions = np.clip(actions, env.full_action_space.low, env.full_action_space.high)
|
||||||
_, _, _, _ = env.env.step(actions)
|
_, _, _, _ = env.env.step(actions)
|
||||||
act[t, :] = actions
|
act[t, :] = actions
|
||||||
|
0
alr_envs/mp/__init__.py
Normal file
0
alr_envs/mp/__init__.py
Normal file
17
alr_envs/mp/basis_generator_factory.py
Normal file
17
alr_envs/mp/basis_generator_factory.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from mp_pytorch import PhaseGenerator, NormalizedRBFBasisGenerator, ZeroStartNormalizedRBFBasisGenerator
|
||||||
|
from mp_pytorch.basis_gn.rhytmic_basis import RhythmicBasisGenerator
|
||||||
|
|
||||||
|
ALL_TYPES = ["rbf", "zero_rbf", "rhythmic"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_basis_generator(basis_generator_type: str, phase_generator: PhaseGenerator, **kwargs):
|
||||||
|
basis_generator_type = basis_generator_type.lower()
|
||||||
|
if basis_generator_type == "rbf":
|
||||||
|
return NormalizedRBFBasisGenerator(phase_generator, **kwargs)
|
||||||
|
elif basis_generator_type == "zero_rbf":
|
||||||
|
return ZeroStartNormalizedRBFBasisGenerator(phase_generator, **kwargs)
|
||||||
|
elif basis_generator_type == "rhythmic":
|
||||||
|
return RhythmicBasisGenerator(phase_generator, **kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Specified basis generator type {basis_generator_type} not supported, "
|
||||||
|
f"please choose one of {ALL_TYPES}.")
|
0
alr_envs/mp/controllers/__init__.py
Normal file
0
alr_envs/mp/controllers/__init__.py
Normal file
4
alr_envs/mp/controllers/base_controller.py
Normal file
4
alr_envs/mp/controllers/base_controller.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
class BaseController:
|
||||||
|
|
||||||
|
def get_action(self, des_pos, des_vel, c_pos, c_vel):
|
||||||
|
raise NotImplementedError
|
21
alr_envs/mp/controllers/controller_factory.py
Normal file
21
alr_envs/mp/controllers/controller_factory.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from alr_envs.mp.controllers.meta_world_controller import MetaWorldController
|
||||||
|
from alr_envs.mp.controllers.pd_controller import PDController
|
||||||
|
from alr_envs.mp.controllers.vel_controller import VelController
|
||||||
|
from alr_envs.mp.controllers.pos_controller import PosController
|
||||||
|
|
||||||
|
ALL_TYPES = ["motor", "velocity", "position", "metaworld"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_controller(controller_type: str, **kwargs):
|
||||||
|
controller_type = controller_type.lower()
|
||||||
|
if controller_type == "motor":
|
||||||
|
return PDController(**kwargs)
|
||||||
|
elif controller_type == "velocity":
|
||||||
|
return VelController()
|
||||||
|
elif controller_type == "position":
|
||||||
|
return PosController()
|
||||||
|
elif controller_type == "metaworld":
|
||||||
|
return MetaWorldController()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Specified controller type {controller_type} not supported, "
|
||||||
|
f"please choose one of {ALL_TYPES}.")
|
25
alr_envs/mp/controllers/meta_world_controller.py
Normal file
25
alr_envs/mp/controllers/meta_world_controller.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from alr_envs.mp.controllers.base_controller import BaseController
|
||||||
|
|
||||||
|
|
||||||
|
class MetaWorldController(BaseController):
|
||||||
|
"""
|
||||||
|
A Metaworld Controller. Using position and velocity information from a provided environment,
|
||||||
|
the controller calculates a response based on the desired position and velocity.
|
||||||
|
Unlike the other Controllers, this is a special controller for MetaWorld environments.
|
||||||
|
They use a position delta for the xyz coordinates and a raw position for the gripper opening.
|
||||||
|
|
||||||
|
:param env: A position environment
|
||||||
|
"""
|
||||||
|
|
||||||
|
def get_action(self, des_pos, des_vel, c_pos, c_vel):
|
||||||
|
gripper_pos = des_pos[-1]
|
||||||
|
|
||||||
|
cur_pos = env.current_pos[:-1]
|
||||||
|
xyz_pos = des_pos[:-1]
|
||||||
|
|
||||||
|
assert xyz_pos.shape == cur_pos.shape, \
|
||||||
|
f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}"
|
||||||
|
trq = np.hstack([(xyz_pos - cur_pos), gripper_pos])
|
||||||
|
return trq
|
28
alr_envs/mp/controllers/pd_controller.py
Normal file
28
alr_envs/mp/controllers/pd_controller.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
from typing import Union, Tuple
|
||||||
|
|
||||||
|
from alr_envs.mp.controllers.base_controller import BaseController
|
||||||
|
|
||||||
|
|
||||||
|
class PDController(BaseController):
|
||||||
|
"""
|
||||||
|
A PD-Controller. Using position and velocity information from a provided environment,
|
||||||
|
the controller calculates a response based on the desired position and velocity
|
||||||
|
|
||||||
|
:param env: A position environment
|
||||||
|
:param p_gains: Factors for the proportional gains
|
||||||
|
:param d_gains: Factors for the differential gains
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
p_gains: Union[float, Tuple] = 1,
|
||||||
|
d_gains: Union[float, Tuple] = 0.5):
|
||||||
|
self.p_gains = p_gains
|
||||||
|
self.d_gains = d_gains
|
||||||
|
|
||||||
|
def get_action(self, des_pos, des_vel, c_pos, c_vel):
|
||||||
|
assert des_pos.shape == c_pos.shape, \
|
||||||
|
f"Mismatch in dimension between desired position {des_pos.shape} and current position {c_pos.shape}"
|
||||||
|
assert des_vel.shape == c_vel.shape, \
|
||||||
|
f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {c_vel.shape}"
|
||||||
|
trq = self.p_gains * (des_pos - c_pos) + self.d_gains * (des_vel - c_vel)
|
||||||
|
return trq
|
9
alr_envs/mp/controllers/pos_controller.py
Normal file
9
alr_envs/mp/controllers/pos_controller.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from alr_envs.mp.controllers.base_controller import BaseController
|
||||||
|
|
||||||
|
|
||||||
|
class PosController(BaseController):
|
||||||
|
"""
|
||||||
|
A Position Controller. The controller calculates a response only based on the desired position.
|
||||||
|
"""
|
||||||
|
def get_action(self, des_pos, des_vel, c_pos, c_vel):
|
||||||
|
return des_pos
|
9
alr_envs/mp/controllers/vel_controller.py
Normal file
9
alr_envs/mp/controllers/vel_controller.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from alr_envs.mp.controllers.base_controller import BaseController
|
||||||
|
|
||||||
|
|
||||||
|
class VelController(BaseController):
|
||||||
|
"""
|
||||||
|
A Velocity Controller. The controller calculates a response only based on the desired velocity.
|
||||||
|
"""
|
||||||
|
def get_action(self, des_pos, des_vel, c_pos, c_vel):
|
||||||
|
return des_vel
|
@ -3,15 +3,13 @@ from typing import Union, Tuple
|
|||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from gym import spaces
|
from gym import spaces
|
||||||
from gym.envs.mujoco import MujocoEnv
|
|
||||||
|
|
||||||
from policies import get_policy_class, BaseController
|
|
||||||
from mp_pytorch.mp.mp_interfaces import MPInterface
|
from mp_pytorch.mp.mp_interfaces import MPInterface
|
||||||
|
|
||||||
|
from alr_envs.mp.controllers.base_controller import BaseController
|
||||||
|
|
||||||
class BaseMPWrapper(gym.Env, ABC):
|
|
||||||
|
class EpisodicWrapper(gym.Env, ABC):
|
||||||
"""
|
"""
|
||||||
Base class for movement primitive based gym.Wrapper implementations.
|
Base class for movement primitive based gym.Wrapper implementations.
|
||||||
|
|
||||||
@ -20,41 +18,34 @@ class BaseMPWrapper(gym.Env, ABC):
|
|||||||
num_dof: Dimension of the action space of the wrapped env
|
num_dof: Dimension of the action space of the wrapped env
|
||||||
num_basis: Number of basis functions per dof
|
num_basis: Number of basis functions per dof
|
||||||
duration: Length of the trajectory of the movement primitive in seconds
|
duration: Length of the trajectory of the movement primitive in seconds
|
||||||
post_traj_time: Time for which the last position of the trajectory is fed to the environment to continue
|
controller: Type or object defining the policy that is used to generate action based on the trajectory
|
||||||
simulation in seconds
|
|
||||||
policy_type: Type or object defining the policy that is used to generate action based on the trajectory
|
|
||||||
weight_scale: Scaling parameter for the actions given to this wrapper
|
weight_scale: Scaling parameter for the actions given to this wrapper
|
||||||
render_mode: Equivalent to gym render mode
|
render_mode: Equivalent to gym render mode
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
env: MujocoEnv,
|
env: gym.Env,
|
||||||
mp: MPInterface,
|
mp: MPInterface,
|
||||||
|
controller: BaseController,
|
||||||
duration: float,
|
duration: float,
|
||||||
policy_type: Union[str, BaseController] = None,
|
|
||||||
render_mode: str = None,
|
render_mode: str = None,
|
||||||
verbose=2,
|
verbose: int = 1,
|
||||||
**mp_kwargs
|
weight_scale: float = 1):
|
||||||
):
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
assert env.dt is not None
|
|
||||||
self.env = env
|
self.env = env
|
||||||
self.dt = env.dt
|
try:
|
||||||
|
self.dt = env.dt
|
||||||
|
except AttributeError:
|
||||||
|
raise AttributeError("step based environment needs to have a function 'dt' ")
|
||||||
self.duration = duration
|
self.duration = duration
|
||||||
self.traj_steps = int(duration / self.dt)
|
self.traj_steps = int(duration / self.dt)
|
||||||
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
|
self.post_traj_steps = self.env.spec.max_episode_steps - self.traj_steps
|
||||||
|
|
||||||
# TODO: move to constructor, use policy factory instead what Fabian already coded
|
self.controller = controller
|
||||||
if isinstance(policy_type, str):
|
|
||||||
# pop policy kwargs here such that they are not passed to the initialize_mp method
|
|
||||||
self.policy = get_policy_class(policy_type, self, **mp_kwargs.pop('policy_kwargs', {}))
|
|
||||||
else:
|
|
||||||
self.policy = policy_type
|
|
||||||
|
|
||||||
self.mp = mp
|
self.mp = mp
|
||||||
self.env = env
|
self.env = env
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
self.weight_scale = weight_scale
|
||||||
|
|
||||||
# rendering
|
# rendering
|
||||||
self.render_mode = render_mode
|
self.render_mode = render_mode
|
||||||
@ -62,19 +53,24 @@ class BaseMPWrapper(gym.Env, ABC):
|
|||||||
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
|
# self.time_steps = np.linspace(0, self.duration, self.traj_steps + 1)
|
||||||
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
self.time_steps = np.linspace(0, self.duration, self.traj_steps)
|
||||||
self.mp.set_mp_times(self.time_steps)
|
self.mp.set_mp_times(self.time_steps)
|
||||||
|
|
||||||
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
|
# action_bounds = np.inf * np.ones((np.prod(self.mp.num_params)))
|
||||||
min_action_bounds, max_action_bounds = mp.get_param_bounds()
|
min_action_bounds, max_action_bounds = mp.get_param_bounds()
|
||||||
self.action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
self.mp_action_space = gym.spaces.Box(low=min_action_bounds.numpy(), high=max_action_bounds.numpy(),
|
||||||
dtype=np.float32)
|
dtype=np.float32)
|
||||||
|
|
||||||
|
self.action_space = self.set_action_space()
|
||||||
self.active_obs = self.set_active_obs()
|
self.active_obs = self.set_active_obs()
|
||||||
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.active_obs],
|
self.observation_space = spaces.Box(low=self.env.observation_space.low[self.active_obs],
|
||||||
high=self.env.observation_space.high[self.active_obs],
|
high=self.env.observation_space.high[self.active_obs],
|
||||||
dtype=self.env.observation_space.dtype)
|
dtype=self.env.observation_space.dtype)
|
||||||
|
|
||||||
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
def get_trajectory(self, action: np.ndarray) -> Tuple:
|
||||||
self.mp.set_params(action)
|
# TODO: this follows the implementation of the mp_pytorch library which includes the paramters tau and delay at
|
||||||
|
# the beginning of the array.
|
||||||
|
ignore_indices = int(self.mp.learn_tau) + int(self.mp.learn_delay)
|
||||||
|
scaled_mp_params = action.copy()
|
||||||
|
scaled_mp_params[ignore_indices:] *= self.weight_scale
|
||||||
|
self.mp.set_params(scaled_mp_params)
|
||||||
self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
|
self.mp.set_boundary_conditions(bc_time=self.time_steps[:1], bc_pos=self.current_pos, bc_vel=self.current_vel)
|
||||||
traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
|
traj_dict = self.mp.get_mp_trajs(get_pos = True, get_vel = True)
|
||||||
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
trajectory_tensor, velocity_tensor = traj_dict['pos'], traj_dict['vel']
|
||||||
@ -88,9 +84,54 @@ class BaseMPWrapper(gym.Env, ABC):
|
|||||||
|
|
||||||
return trajectory, velocity
|
return trajectory, velocity
|
||||||
|
|
||||||
|
def set_action_space(self):
|
||||||
|
"""
|
||||||
|
This function can be used to modify the action space for considering actions which are not learned via motion
|
||||||
|
primitives. E.g. ball releasing time for the beer pong task. By default, it is the parameter space of the
|
||||||
|
motion primitive.
|
||||||
|
Only needs to be overwritten if the action space needs to be modified.
|
||||||
|
"""
|
||||||
|
return self.mp_action_space
|
||||||
|
|
||||||
|
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
|
||||||
|
"""
|
||||||
|
Used to extract the parameters for the motion primitive and other parameters from an action array which might
|
||||||
|
include other actions like ball releasing time for the beer pong environment.
|
||||||
|
This only needs to be overwritten if the action space is modified.
|
||||||
|
Args:
|
||||||
|
action: a vector instance of the whole action space, includes mp parameters and additional parameters if
|
||||||
|
specified, else only mp parameters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple: mp_arguments and other arguments
|
||||||
|
"""
|
||||||
|
return action, None
|
||||||
|
|
||||||
|
def _step_callback(self, t: int, env_spec_params: Union[np.ndarray, None], step_action: np.ndarray) -> Union[np.ndarray]:
|
||||||
|
"""
|
||||||
|
This function can be used to modify the step_action with additional parameters e.g. releasing the ball in the
|
||||||
|
Beerpong env. The parameters used should not be part of the motion primitive parameters.
|
||||||
|
Returns step_action by default, can be overwritten in individual mp_wrappers.
|
||||||
|
Args:
|
||||||
|
t: the current time step of the episode
|
||||||
|
env_spec_params: the environment specific parameter, as defined in fucntion _episode_callback
|
||||||
|
(e.g. ball release time in Beer Pong)
|
||||||
|
step_action: the current step-based action
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
modified step action
|
||||||
|
"""
|
||||||
|
return step_action
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def set_active_obs(self):
|
def set_active_obs(self) -> np.ndarray:
|
||||||
pass
|
"""
|
||||||
|
This function defines the contexts. The contexts are defined as specific observations.
|
||||||
|
Returns:
|
||||||
|
boolearn array representing the indices of the observations
|
||||||
|
|
||||||
|
"""
|
||||||
|
return np.ones(self.env.observation_space.shape[0], dtype=bool)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@ -116,38 +157,34 @@ class BaseMPWrapper(gym.Env, ABC):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def _step_callback(self, t, action):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def step(self, action: np.ndarray):
|
def step(self, action: np.ndarray):
|
||||||
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
""" This function generates a trajectory based on a MP and then does the usual loop over reset and step"""
|
||||||
# TODO: Think about sequencing
|
# TODO: Think about sequencing
|
||||||
# TODO: Reward Function rather here?
|
# TODO: Reward Function rather here?
|
||||||
# agent to learn when to release the ball
|
# agent to learn when to release the ball
|
||||||
trajectory, velocity = self.get_trajectory(action)
|
mp_params, env_spec_params = self._episode_callback(action)
|
||||||
|
trajectory, velocity = self.get_trajectory(mp_params)
|
||||||
|
|
||||||
trajectory_length = len(trajectory)
|
trajectory_length = len(trajectory)
|
||||||
actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
|
if self.verbose >=2 :
|
||||||
if isinstance(self.env.observation_space, spaces.Dict): # For goal environments
|
actions = np.zeros(shape=(trajectory_length,) + self.env.action_space.shape)
|
||||||
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space["observation"].shape,
|
|
||||||
dtype=self.env.observation_space.dtype)
|
|
||||||
else:
|
|
||||||
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
|
observations = np.zeros(shape=(trajectory_length,) + self.env.observation_space.shape,
|
||||||
dtype=self.env.observation_space.dtype)
|
dtype=self.env.observation_space.dtype)
|
||||||
rewards = np.zeros(shape=(trajectory_length,))
|
rewards = np.zeros(shape=(trajectory_length,))
|
||||||
trajectory_return = 0
|
trajectory_return = 0
|
||||||
|
|
||||||
infos = dict()
|
infos = dict()
|
||||||
|
|
||||||
for t, pos_vel in enumerate(zip(trajectory, velocity)):
|
for t, pos_vel in enumerate(zip(trajectory, velocity)):
|
||||||
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
|
step_action = self.controller.get_action(pos_vel[0], pos_vel[1], self.current_pos, self.current_vel)
|
||||||
callback_action = self._step_callback(t, action)
|
step_action = self._step_callback(t, env_spec_params, step_action) # include possible callback info
|
||||||
if callback_action is not None:
|
c_action = np.clip(step_action, self.env.action_space.low, self.env.action_space.high)
|
||||||
ac = np.concatenate((callback_action, ac)) # include callback action at first pos of vector
|
obs, c_reward, done, info = self.env.step(c_action)
|
||||||
actions[t, :] = np.clip(ac, self.env.action_space.low, self.env.action_space.high)
|
if self.verbose >= 2:
|
||||||
obs, rewards[t], done, info = self.env.step(actions[t, :])
|
actions[t, :] = c_action
|
||||||
observations[t, :] = obs["observation"] if isinstance(self.env.observation_space, spaces.Dict) else obs
|
rewards[t] = c_reward
|
||||||
trajectory_return += rewards[t]
|
observations[t, :] = obs
|
||||||
|
trajectory_return += c_reward
|
||||||
for k, v in info.items():
|
for k, v in info.items():
|
||||||
elems = infos.get(k, [None] * trajectory_length)
|
elems = infos.get(k, [None] * trajectory_length)
|
||||||
elems[t] = v
|
elems[t] = v
|
||||||
@ -158,14 +195,14 @@ class BaseMPWrapper(gym.Env, ABC):
|
|||||||
if done:
|
if done:
|
||||||
break
|
break
|
||||||
infos.update({k: v[:t + 1] for k, v in infos.items()})
|
infos.update({k: v[:t + 1] for k, v in infos.items()})
|
||||||
infos['trajectory'] = trajectory
|
if self.verbose >= 2:
|
||||||
if self.verbose == 2:
|
infos['trajectory'] = trajectory
|
||||||
infos['step_actions'] = actions[:t + 1]
|
infos['step_actions'] = actions[:t + 1]
|
||||||
infos['step_observations'] = observations[:t + 1]
|
infos['step_observations'] = observations[:t + 1]
|
||||||
infos['step_rewards'] = rewards[:t + 1]
|
infos['step_rewards'] = rewards[:t + 1]
|
||||||
infos['trajectory_length'] = t + 1
|
infos['trajectory_length'] = t + 1
|
||||||
done = True
|
done = True
|
||||||
return self.get_observation_from_step(observations[t]), trajectory_return, done, infos
|
return self.get_observation_from_step(obs), trajectory_return, done, infos
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
return self.get_observation_from_step(self.env.reset())
|
return self.get_observation_from_step(self.env.reset())
|
22
alr_envs/mp/mp_factory.py
Normal file
22
alr_envs/mp/mp_factory.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
from mp_pytorch.mp.dmp import DMP
|
||||||
|
from mp_pytorch.mp.promp import ProMP
|
||||||
|
from mp_pytorch.mp.idmp import IDMP
|
||||||
|
|
||||||
|
from mp_pytorch.basis_gn.basis_generator import BasisGenerator
|
||||||
|
|
||||||
|
ALL_TYPES = ["promp", "dmp", "idmp"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_movement_primitive(
|
||||||
|
movement_primitives_type: str, action_dim: int, basis_generator: BasisGenerator, **kwargs
|
||||||
|
):
|
||||||
|
movement_primitives_type = movement_primitives_type.lower()
|
||||||
|
if movement_primitives_type == "promp":
|
||||||
|
return ProMP(basis_generator, action_dim, **kwargs)
|
||||||
|
elif movement_primitives_type == "dmp":
|
||||||
|
return DMP(basis_generator, action_dim, **kwargs)
|
||||||
|
elif movement_primitives_type == 'idmp':
|
||||||
|
return IDMP(basis_generator, action_dim, **kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Specified movement primitive type {movement_primitives_type} not supported, "
|
||||||
|
f"please choose one of {ALL_TYPES}.")
|
20
alr_envs/mp/phase_generator_factory.py
Normal file
20
alr_envs/mp/phase_generator_factory.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from mp_pytorch import LinearPhaseGenerator, ExpDecayPhaseGenerator
|
||||||
|
from mp_pytorch.phase_gn.rhythmic_phase_generator import RhythmicPhaseGenerator
|
||||||
|
from mp_pytorch.phase_gn.smooth_phase_generator import SmoothPhaseGenerator
|
||||||
|
|
||||||
|
ALL_TYPES = ["linear", "exp", "rhythmic", "smooth"]
|
||||||
|
|
||||||
|
|
||||||
|
def get_phase_generator(phase_generator_type, **kwargs):
|
||||||
|
phase_generator_type = phase_generator_type.lower()
|
||||||
|
if phase_generator_type == "linear":
|
||||||
|
return LinearPhaseGenerator(**kwargs)
|
||||||
|
elif phase_generator_type == "exp":
|
||||||
|
return ExpDecayPhaseGenerator(**kwargs)
|
||||||
|
elif phase_generator_type == "rhythmic":
|
||||||
|
return RhythmicPhaseGenerator(**kwargs)
|
||||||
|
elif phase_generator_type == "smooth":
|
||||||
|
return SmoothPhaseGenerator(**kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Specified phase generator type {phase_generator_type} not supported, "
|
||||||
|
f"please choose one of {ALL_TYPES}.")
|
@ -1,13 +1,20 @@
|
|||||||
import warnings
|
import warnings
|
||||||
from typing import Iterable, Type, Union
|
from typing import Iterable, Type, Union, Mapping, MutableMapping
|
||||||
|
|
||||||
import gym
|
import gym
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from gym.envs.registration import EnvSpec
|
from gym.envs.registration import EnvSpec
|
||||||
|
|
||||||
from mp_env_api import MPEnvWrapper
|
|
||||||
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
from mp_env_api.mp_wrappers.dmp_wrapper import DmpWrapper
|
||||||
from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper
|
from mp_env_api.mp_wrappers.promp_wrapper import ProMPWrapper
|
||||||
|
from mp_pytorch import MPInterface
|
||||||
|
|
||||||
|
from alr_envs.mp.basis_generator_factory import get_basis_generator
|
||||||
|
from alr_envs.mp.controllers.base_controller import BaseController
|
||||||
|
from alr_envs.mp.controllers.controller_factory import get_controller
|
||||||
|
from alr_envs.mp.mp_factory import get_movement_primitive
|
||||||
|
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
|
||||||
|
from alr_envs.mp.phase_generator_factory import get_phase_generator
|
||||||
|
|
||||||
|
|
||||||
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
def make_rank(env_id: str, seed: int, rank: int = 0, return_callable=True, **kwargs):
|
||||||
@ -92,7 +99,8 @@ def make(env_id: str, seed, **kwargs):
|
|||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1, **kwargs):
|
def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], mp: MPInterface, controller: BaseController,
|
||||||
|
ep_wrapper_kwargs: Mapping, seed=1, **kwargs):
|
||||||
"""
|
"""
|
||||||
Helper function for creating a wrapped gym environment using MPs.
|
Helper function for creating a wrapped gym environment using MPs.
|
||||||
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
|
It adds all provided wrappers to the specified environment and verifies at least one MPEnvWrapper is
|
||||||
@ -108,14 +116,102 @@ def _make_wrapped_env(env_id: str, wrappers: Iterable[Type[gym.Wrapper]], seed=1
|
|||||||
"""
|
"""
|
||||||
# _env = gym.make(env_id)
|
# _env = gym.make(env_id)
|
||||||
_env = make(env_id, seed, **kwargs)
|
_env = make(env_id, seed, **kwargs)
|
||||||
|
has_episodic_wrapper = False
|
||||||
assert any(issubclass(w, MPEnvWrapper) for w in wrappers), \
|
|
||||||
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
|
|
||||||
for w in wrappers:
|
for w in wrappers:
|
||||||
_env = w(_env)
|
# only wrap the environment if not EpisodicWrapper, e.g. for vision
|
||||||
|
if not issubclass(w, EpisodicWrapper):
|
||||||
|
_env = w(_env)
|
||||||
|
else: # if EpisodicWrapper, use specific constructor
|
||||||
|
has_episodic_wrapper = True
|
||||||
|
_env = w(env=_env, mp=mp, controller=controller, **ep_wrapper_kwargs)
|
||||||
|
assert has_episodic_wrapper, \
|
||||||
|
"At least one MPEnvWrapper is required in order to leverage motion primitive environments."
|
||||||
return _env
|
return _env
|
||||||
|
|
||||||
|
def make_mp_from_kwargs(
|
||||||
|
env_id: str, wrappers: Iterable, ep_wrapper_kwargs: MutableMapping, mp_kwargs: MutableMapping,
|
||||||
|
controller_kwargs: MutableMapping, phase_kwargs: MutableMapping, basis_kwargs: MutableMapping, seed=1,
|
||||||
|
sequenced=False, **kwargs
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
This can also be used standalone for manually building a custom DMP environment.
|
||||||
|
Args:
|
||||||
|
ep_wrapper_kwargs:
|
||||||
|
basis_kwargs:
|
||||||
|
phase_kwargs:
|
||||||
|
controller_kwargs:
|
||||||
|
env_id: base_env_name,
|
||||||
|
wrappers: list of wrappers (at least an EpisodicWrapper),
|
||||||
|
seed: seed of environment
|
||||||
|
sequenced: When true, this allows to sequence multiple ProMPs by specifying the duration of each sub-trajectory,
|
||||||
|
this behavior is much closer to step based learning.
|
||||||
|
mp_kwargs: dict of at least {num_dof: int, num_basis: int} for DMP
|
||||||
|
|
||||||
|
Returns: DMP wrapped gym env
|
||||||
|
|
||||||
|
"""
|
||||||
|
_verify_time_limit(mp_kwargs.get("duration", None), kwargs.get("time_limit", None))
|
||||||
|
dummy_env = make(env_id, seed)
|
||||||
|
if ep_wrapper_kwargs.get('duration', None) is None:
|
||||||
|
ep_wrapper_kwargs['duration'] = dummy_env.spec.max_episode_steps*dummy_env.dt
|
||||||
|
if phase_kwargs.get('tau', None) is None:
|
||||||
|
phase_kwargs['tau'] = ep_wrapper_kwargs['duration']
|
||||||
|
action_dim = mp_kwargs.pop('num_dof', None)
|
||||||
|
action_dim = action_dim if action_dim is not None else np.prod(dummy_env.action_space.shape).item()
|
||||||
|
phase_gen = get_phase_generator(**phase_kwargs)
|
||||||
|
basis_gen = get_basis_generator(phase_generator=phase_gen, **basis_kwargs)
|
||||||
|
controller = get_controller(**controller_kwargs)
|
||||||
|
mp = get_movement_primitive(action_dim=action_dim, basis_generator=basis_gen, **mp_kwargs)
|
||||||
|
_env = _make_wrapped_env(env_id=env_id, wrappers=wrappers, mp=mp, controller=controller,
|
||||||
|
ep_wrapper_kwargs=ep_wrapper_kwargs, seed=seed, **kwargs)
|
||||||
|
return _env
|
||||||
|
|
||||||
|
|
||||||
|
def make_mp_env_helper(**kwargs):
|
||||||
|
"""
|
||||||
|
Helper function for registering a DMP gym environments.
|
||||||
|
Args:
|
||||||
|
**kwargs: expects at least the following:
|
||||||
|
{
|
||||||
|
"name": base environment name.
|
||||||
|
"wrappers": list of wrappers (at least an EpisodicWrapper is required),
|
||||||
|
"movement_primitives_kwargs": {
|
||||||
|
"movement_primitives_type": type_of_your_movement_primitive,
|
||||||
|
non default arguments for the movement primitive instance
|
||||||
|
...
|
||||||
|
}
|
||||||
|
"controller_kwargs": {
|
||||||
|
"controller_type": type_of_your_controller,
|
||||||
|
non default arguments for the controller instance
|
||||||
|
...
|
||||||
|
},
|
||||||
|
"basis_generator_kwargs": {
|
||||||
|
"basis_generator_type": type_of_your_basis_generator,
|
||||||
|
non default arguments for the basis generator instance
|
||||||
|
...
|
||||||
|
},
|
||||||
|
"phase_generator_kwargs": {
|
||||||
|
"phase_generator_type": type_of_your_phase_generator,
|
||||||
|
non default arguments for the phase generator instance
|
||||||
|
...
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
Returns: MP wrapped gym env
|
||||||
|
|
||||||
|
"""
|
||||||
|
seed = kwargs.pop("seed", None)
|
||||||
|
wrappers = kwargs.pop("wrappers")
|
||||||
|
|
||||||
|
mp_kwargs = kwargs.pop("movement_primitives_kwargs")
|
||||||
|
ep_wrapper_kwargs = kwargs.pop('ep_wrapper_kwargs')
|
||||||
|
contr_kwargs = kwargs.pop("controller_kwargs")
|
||||||
|
phase_kwargs = kwargs.pop("phase_generator_kwargs")
|
||||||
|
basis_kwargs = kwargs.pop("basis_generator_kwargs")
|
||||||
|
|
||||||
|
return make_mp_from_kwargs(env_id=kwargs.pop("name"), wrappers=wrappers, ep_wrapper_kwargs=ep_wrapper_kwargs,
|
||||||
|
mp_kwargs=mp_kwargs, controller_kwargs=contr_kwargs, phase_kwargs=phase_kwargs,
|
||||||
|
basis_kwargs=basis_kwargs, **kwargs, seed=seed)
|
||||||
|
|
||||||
def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
|
def make_dmp_env(env_id: str, wrappers: Iterable, seed=1, mp_kwargs={}, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
129
policies.py
129
policies.py
@ -1,129 +0,0 @@
|
|||||||
from typing import Tuple, Union
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BaseController:
|
|
||||||
def __init__(self, env, **kwargs):
|
|
||||||
self.env = env
|
|
||||||
|
|
||||||
def get_action(self, des_pos, des_vel):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class PosController(BaseController):
|
|
||||||
"""
|
|
||||||
A Position Controller. The controller calculates a response only based on the desired position.
|
|
||||||
"""
|
|
||||||
def get_action(self, des_pos, des_vel):
|
|
||||||
return des_pos
|
|
||||||
|
|
||||||
|
|
||||||
class VelController(BaseController):
|
|
||||||
"""
|
|
||||||
A Velocity Controller. The controller calculates a response only based on the desired velocity.
|
|
||||||
"""
|
|
||||||
def get_action(self, des_pos, des_vel):
|
|
||||||
return des_vel
|
|
||||||
|
|
||||||
|
|
||||||
class PDController(BaseController):
|
|
||||||
"""
|
|
||||||
A PD-Controller. Using position and velocity information from a provided environment,
|
|
||||||
the controller calculates a response based on the desired position and velocity
|
|
||||||
|
|
||||||
:param env: A position environment
|
|
||||||
:param p_gains: Factors for the proportional gains
|
|
||||||
:param d_gains: Factors for the differential gains
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
env,
|
|
||||||
p_gains: Union[float, Tuple],
|
|
||||||
d_gains: Union[float, Tuple]):
|
|
||||||
self.p_gains = p_gains
|
|
||||||
self.d_gains = d_gains
|
|
||||||
super(PDController, self).__init__(env)
|
|
||||||
|
|
||||||
def get_action(self, des_pos, des_vel):
|
|
||||||
cur_pos = self.env.current_pos
|
|
||||||
cur_vel = self.env.current_vel
|
|
||||||
assert des_pos.shape == cur_pos.shape, \
|
|
||||||
f"Mismatch in dimension between desired position {des_pos.shape} and current position {cur_pos.shape}"
|
|
||||||
assert des_vel.shape == cur_vel.shape, \
|
|
||||||
f"Mismatch in dimension between desired velocity {des_vel.shape} and current velocity {cur_vel.shape}"
|
|
||||||
trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
|
|
||||||
return trq
|
|
||||||
|
|
||||||
|
|
||||||
class MetaWorldController(BaseController):
|
|
||||||
"""
|
|
||||||
A Metaworld Controller. Using position and velocity information from a provided environment,
|
|
||||||
the controller calculates a response based on the desired position and velocity.
|
|
||||||
Unlike the other Controllers, this is a special controller for MetaWorld environments.
|
|
||||||
They use a position delta for the xyz coordinates and a raw position for the gripper opening.
|
|
||||||
|
|
||||||
:param env: A position environment
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
env
|
|
||||||
):
|
|
||||||
super(MetaWorldController, self).__init__(env)
|
|
||||||
|
|
||||||
def get_action(self, des_pos, des_vel):
|
|
||||||
gripper_pos = des_pos[-1]
|
|
||||||
|
|
||||||
cur_pos = self.env.current_pos[:-1]
|
|
||||||
xyz_pos = des_pos[:-1]
|
|
||||||
|
|
||||||
assert xyz_pos.shape == cur_pos.shape, \
|
|
||||||
f"Mismatch in dimension between desired position {xyz_pos.shape} and current position {cur_pos.shape}"
|
|
||||||
trq = np.hstack([(xyz_pos - cur_pos), gripper_pos])
|
|
||||||
return trq
|
|
||||||
|
|
||||||
|
|
||||||
#TODO: Do we need this class?
|
|
||||||
class PDControllerExtend(BaseController):
|
|
||||||
"""
|
|
||||||
A PD-Controller. Using position and velocity information from a provided positional environment,
|
|
||||||
the controller calculates a response based on the desired position and velocity
|
|
||||||
|
|
||||||
:param env: A position environment
|
|
||||||
:param p_gains: Factors for the proportional gains
|
|
||||||
:param d_gains: Factors for the differential gains
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
env,
|
|
||||||
p_gains: Union[float, Tuple],
|
|
||||||
d_gains: Union[float, Tuple]):
|
|
||||||
|
|
||||||
self.p_gains = p_gains
|
|
||||||
self.d_gains = d_gains
|
|
||||||
super(PDControllerExtend, self).__init__(env)
|
|
||||||
|
|
||||||
def get_action(self, des_pos, des_vel):
|
|
||||||
cur_pos = self.env.current_pos
|
|
||||||
cur_vel = self.env.current_vel
|
|
||||||
if len(des_pos) != len(cur_pos):
|
|
||||||
des_pos = self.env.extend_des_pos(des_pos)
|
|
||||||
if len(des_vel) != len(cur_vel):
|
|
||||||
des_vel = self.env.extend_des_vel(des_vel)
|
|
||||||
trq = self.p_gains * (des_pos - cur_pos) + self.d_gains * (des_vel - cur_vel)
|
|
||||||
return trq
|
|
||||||
|
|
||||||
|
|
||||||
def get_policy_class(policy_type, env, **kwargs):
|
|
||||||
if policy_type == "motor":
|
|
||||||
return PDController(env, **kwargs)
|
|
||||||
elif policy_type == "velocity":
|
|
||||||
return VelController(env)
|
|
||||||
elif policy_type == "position":
|
|
||||||
return PosController(env)
|
|
||||||
elif policy_type == "metaworld":
|
|
||||||
return MetaWorldController(env)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Invalid controller type {policy_type} provided. Only 'motor', 'velocity', 'position' "
|
|
||||||
f"and 'metaworld are currently supported controllers.")
|
|
Loading…
Reference in New Issue
Block a user