fancy_gym/alr_envs/alr/mujoco/beerpong/new_mp_wrapper.py

50 lines
1.5 KiB
Python
Raw Normal View History

from typing import Tuple, Union
2022-04-28 09:05:28 +02:00
import numpy as np
from alr_envs.mp.episodic_wrapper import EpisodicWrapper
2022-04-28 09:05:28 +02:00
2022-05-03 19:51:54 +02:00
class NewMPWrapper(EpisodicWrapper):
# def __init__(self, replanning_model):
# self.replanning_model = replanning_model
2022-05-03 19:51:54 +02:00
@property
2022-04-28 09:05:28 +02:00
def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qpos[0:7].copy()
2022-05-03 19:51:54 +02:00
@property
2022-04-28 09:05:28 +02:00
def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
return self.env.sim.data.qvel[0:7].copy()
def set_active_obs(self):
return np.hstack([
[False] * 7, # cos
[False] * 7, # sin
2022-06-02 09:05:38 +02:00
[False] * 7, # joint velocities
[False] * 3, # cup_goal_diff_final
[False] * 3, # cup_goal_diff_top
2022-04-28 09:05:28 +02:00
[True] * 2, # xy position of cup
[False] # env steps
])
def do_replanning(self, pos, vel, s, a, t, last_replan_step):
return False
# const = np.arange(0, 1000, 10)
# return bool(self.replanning_model(s))
2022-05-03 19:51:54 +02:00
def _episode_callback(self, action: np.ndarray) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
2022-05-05 18:50:20 +02:00
if self.mp.learn_tau:
self.env.env.release_step = action[0] / self.env.dt # Tau value
2022-05-05 18:50:20 +02:00
return action, None
2022-05-03 19:51:54 +02:00
else:
return action, None
2022-05-29 11:58:01 +02:00
def set_context(self, context):
xyz = np.zeros(3)
xyz[:2] = context
xyz[-1] = 0.840
self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
return self.get_observation_from_step(self.env.env._get_obs())