fancy_gym/fancy_gym/envs/mujoco/beerpong/mp_wrapper.py

79 lines
2.3 KiB
Python

from typing import Union, Tuple
import numpy as np
from fancy_gym.black_box.raw_interface_wrapper import RawInterfaceWrapper
class MPWrapper(RawInterfaceWrapper):
mp_config = {
'ProMP': {
'phase_generator_kwargs': {
'learn_tau': True
},
'controller_kwargs': {
'p_gains': np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
'd_gains': np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
},
'basis_generator_kwargs': {
'num_basis': 2,
'num_basis_zero_start': 2,
},
},
'DMP': {},
'ProDMP': {},
}
@property
def context_mask(self) -> np.ndarray:
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
[False] * 7, # joint velocities
[False] * 3, # cup_goal_diff_final
[False] * 3, # cup_goal_diff_top
[True] * 2, # xy position of cup
# [False] # env steps
])
@property
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qpos[0:7].copy()
@property
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.data.qvel[0:7].copy()
# TODO: Fix this
def episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None], bool]:
if mp.learn_tau:
self.release_step = action[0] / self.dt # Tau value
return action, None, True
def set_context(self, context):
xyz = np.zeros(3)
xyz[:2] = context
xyz[-1] = 0.840
self.model.body_pos[self.cup_table_id] = xyz
return self.get_observation_from_step(self.get_obs())
class MPWrapper_FixedRelease(MPWrapper):
mp_config = {
'ProMP': {
'phase_generator_kwargs': {
'tau': 0.62,
},
'controller_kwargs': {
'p_gains': np.array([1.5, 5, 2.55, 3, 2., 2, 1.25]),
'd_gains': np.array([0.02333333, 0.1, 0.0625, 0.08, 0.03, 0.03, 0.0125]),
},
'basis_generator_kwargs': {
'num_basis': 2,
'num_basis_zero_start': 2,
},
},
'DMP': {},
'ProDMP': {},
}