diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py deleted file mode 100644 index a481d6f..0000000 --- a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Union, Tuple - -import numpy as np - -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper - - -class MPWrapper(RawInterfaceWrapper): - - @property - def context_mask(self): - return np.hstack([ - [False] * 111, # ant has 111 dimensional observation space !! - [True] # goal height - ]) - - @property - def current_pos(self) -> Union[float, int, np.ndarray]: - return self.env.sim.data.qpos[7:15].copy() - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qvel[6:14].copy() diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py b/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/alr_reward_fct.py b/alr_envs/alr/mujoco/ball_in_a_cup/alr_reward_fct.py deleted file mode 100644 index c96dd07..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/alr_reward_fct.py +++ /dev/null @@ -1,21 +0,0 @@ -class AlrReward: - """ - A base class for non-Markovian reward functions which may need trajectory information to calculate an episodic - reward. Call the methods in reset() and step() of the environment. - """ - - # methods to override: - # ---------------------------- - def reset(self, *args, **kwargs): - """ - Reset the reward function, empty state buffers before an episode, set contexts that influence reward, etc. - """ - raise NotImplementedError - - def compute_reward(self, *args, **kwargs): - """ - - Returns: Useful things to return are reward values, success flags or crash flags - - """ - raise NotImplementedError diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml b/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml deleted file mode 100644 index 9229ad5..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml +++ /dev/null @@ -1,361 +0,0 @@ - - - diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py deleted file mode 100644 index 7b286bc..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ /dev/null @@ -1,195 +0,0 @@ -from gym import utils -import os -import numpy as np -from gym.envs.mujoco import MujocoEnv - - -class ALRBallInACupEnv(MujocoEnv, utils.EzPickle): - def __init__( - self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False, - reward_type: str = None, context: np.ndarray = None - ): - utils.EzPickle.__init__(**locals()) - self._steps = 0 - - self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml") - - self._q_pos = [] - self._q_vel = [] - # self.weight_matrix_scale = 50 - self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) - - self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) - self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) - - self.context = context - - alr_mujoco_env.AlrMujocoEnv.__init__(self, self.xml_path, apply_gravity_comp=apply_gravity_comp, - n_substeps=n_substeps) - self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) - self._start_vel = np.zeros(7) - - self.simplified = simplified - - self.sim_time = 8 # seconds - self.sim_steps = int(self.sim_time / self.dt) - if reward_type == "no_context": - from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward - reward_function = BallInACupReward - elif reward_type == "contextual_goal": - from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward - reward_function = BallInACupReward - else: - raise ValueError("Unknown reward type: {}".format(reward_type)) - self.reward_function = reward_function(self.sim_steps) - - @property - def start_pos(self): - if self.simplified: - return self._start_pos[1::2] - else: - return self._start_pos - - @property - def start_vel(self): - if self.simplified: - return self._start_vel[1::2] - else: - return self._start_vel - - @property - def current_pos(self): - return self.sim.data.qpos[0:7].copy() - - @property - def current_vel(self): - return self.sim.data.qvel[0:7].copy() - - def reset(self): - self.reward_function.reset(None) - return super().reset() - - def reset_model(self): - init_pos_all = self.init_qpos.copy() - init_pos_robot = self._start_pos - init_vel = np.zeros_like(init_pos_all) - - self._steps = 0 - self._q_pos = [] - self._q_vel = [] - - start_pos = init_pos_all - start_pos[0:7] = init_pos_robot - - self.set_state(start_pos, init_vel) - - return self._get_obs() - - def step(self, a): - reward_dist = 0.0 - angular_vel = 0.0 - reward_ctrl = - np.square(a).sum() - - crash = self.do_simulation(a) - # joint_cons_viol = self.check_traj_in_joint_limits() - - self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy()) - self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy()) - - ob = self._get_obs() - - if not crash: - reward, success, is_collided = self.reward_function.compute_reward(a, self) - done = success or self._steps == self.sim_steps - 1 or is_collided - self._steps += 1 - else: - reward = -2000 - success = False - is_collided = False - done = True - return ob, reward, done, dict(reward_dist=reward_dist, - reward_ctrl=reward_ctrl, - velocity=angular_vel, - # traj=self._q_pos, - action=a, - q_pos=self.sim.data.qpos[0:7].ravel().copy(), - q_vel=self.sim.data.qvel[0:7].ravel().copy(), - is_success=success, - is_collided=is_collided, sim_crash=crash) - - def check_traj_in_joint_limits(self): - return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) - - # TODO: extend observation space - def _get_obs(self): - theta = self.sim.data.qpos.flat[:7] - return np.concatenate([ - np.cos(theta), - np.sin(theta), - # self.get_body_com("target"), # only return target to make problem harder - [self._steps], - ]) - - # TODO - @property - def active_obs(self): - return np.hstack([ - [False] * 7, # cos - [False] * 7, # sin - # [True] * 2, # x-y coordinates of target distance - [False] # env steps - ]) - - # These functions are for the task with 3 joint actuations - def extend_des_pos(self, des_pos): - des_pos_full = self._start_pos.copy() - des_pos_full[1] = des_pos[0] - des_pos_full[3] = des_pos[1] - des_pos_full[5] = des_pos[2] - return des_pos_full - - def extend_des_vel(self, des_vel): - des_vel_full = self._start_vel.copy() - des_vel_full[1] = des_vel[0] - des_vel_full[3] = des_vel[1] - des_vel_full[5] = des_vel[2] - return des_vel_full - - def render(self, render_mode, **render_kwargs): - if render_mode == "plot_trajectory": - if self._steps == 1: - import matplotlib.pyplot as plt - # plt.ion() - self.fig, self.axs = plt.subplots(3, 1) - - if self._steps <= 1750: - for ax, cp in zip(self.axs, self.current_pos[1::2]): - ax.scatter(self._steps, cp, s=2, marker=".") - - # self.fig.show() - - else: - super().render(render_mode, **render_kwargs) - - -if __name__ == "__main__": - env = ALRBallInACupEnv() - ctxt = np.array([-0.20869846, -0.66376693, 1.18088501]) - - env.configure(ctxt) - env.reset() - # env.render() - for i in range(16000): - # test with random actions - ac = 0.001 * env.action_space.sample()[0:7] - # ac = env.start_pos - # ac[0] += np.pi/2 - obs, rew, d, info = env.step(ac) - # env.render() - - print(rew) - - if d: - break - - env.close() diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py deleted file mode 100644 index 81a08f5..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Tuple, Union - -import numpy as np - -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper - - -class BallInACupMPWrapper(RawInterfaceWrapper): - - @property - def context_mask(self) -> np.ndarray: - # TODO: @Max Filter observations correctly - return np.hstack([ - [False] * 7, # cos - [False] * 7, # sin - # [True] * 2, # x-y coordinates of target distance - [False] # env steps - ]) - - @property - def start_pos(self): - if self.simplified: - return self._start_pos[1::2] - else: - return self._start_pos - - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qpos[0:7].copy() - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qvel[0:7].copy() - - @property - def goal_pos(self): - # TODO: @Max I think the default value of returning to the start is reasonable here - raise ValueError("Goal position is not available and has to be learnt based on the environment.") - - @property - def dt(self) -> Union[float, int]: - return self.env.dt diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py deleted file mode 100644 index b4d0e6e..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py +++ /dev/null @@ -1,142 +0,0 @@ -import numpy as np -from alr_envs.alr.mujoco.ball_in_a_cup import alr_reward_fct - - -class BallInACupReward(alr_reward_fct.AlrReward): - def __init__(self, sim_time): - self.sim_time = sim_time - - self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", - "wrist_pitch_link_convex_decomposition_p1_geom", - "wrist_pitch_link_convex_decomposition_p2_geom", - "wrist_pitch_link_convex_decomposition_p3_geom", - "wrist_yaw_link_convex_decomposition_p1_geom", - "wrist_yaw_link_convex_decomposition_p2_geom", - "forearm_link_convex_decomposition_p1_geom", - "forearm_link_convex_decomposition_p2_geom"] - - self.ball_id = None - self.ball_collision_id = None - self.goal_id = None - self.goal_final_id = None - self.collision_ids = None - - self.ball_traj = None - self.dists = None - self.dists_ctxt = None - self.dists_final = None - self.costs = None - - self.reset(None) - - def reset(self, context): - self.ball_traj = np.zeros(shape=(self.sim_time, 3)) - self.cup_traj = np.zeros(shape=(self.sim_time, 3)) - self.dists = [] - self.dists_ctxt = [] - self.dists_final = [] - self.costs = [] - self.context = context - self.ball_in_cup = False - self.ball_above_threshold = False - self.dist_ctxt = 3 - self.action_costs = [] - self.cup_angles = [] - - def compute_reward(self, action, sim, step): - action_cost = np.sum(np.square(action)) - self.action_costs.append(action_cost) - - stop_sim = False - success = False - - self.ball_id = sim.model._body_name2id["ball"] - self.ball_collision_id = sim.model._geom_name2id["ball_geom"] - self.goal_id = sim.model._site_name2id["cup_goal"] - self.goal_final_id = sim.model._site_name2id["cup_goal_final"] - self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects] - - if self.check_collision(sim): - reward = - 1e-3 * action_cost - 1000 - stop_sim = True - return reward, success, stop_sim - - # Compute the current distance from the ball to the inner part of the cup - goal_pos = sim.data.site_xpos[self.goal_id] - ball_pos = sim.data.body_xpos[self.ball_id] - goal_final_pos = sim.data.site_xpos[self.goal_final_id] - self.dists.append(np.linalg.norm(goal_pos - ball_pos)) - self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) - self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context)) - self.ball_traj[step, :] = np.copy(ball_pos) - self.cup_traj[step, :] = np.copy(goal_pos) # ? - cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]]) - self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), - 1 - 2 * (cup_quat[1] ** 2 + cup_quat[2] ** 2))) - - # Determine the first time when ball is in cup - if not self.ball_in_cup: - ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) - self.ball_in_cup = ball_in_cup - if ball_in_cup: - dist_to_ctxt = np.linalg.norm(ball_pos - self.context) - self.dist_ctxt = dist_to_ctxt - - if step == self.sim_time - 1: - t_min_dist = np.argmin(self.dists) - angle_min_dist = self.cup_angles[t_min_dist] - cost_angle = (angle_min_dist - np.pi / 2) ** 2 - - min_dist = np.min(self.dists) - dist_final = self.dists_final[-1] - # dist_ctxt = self.dists_ctxt[-1] - - # # max distance between ball and cup and cup height at that time - # ball_to_cup_diff = self.ball_traj[:, 2] - self.cup_traj[:, 2] - # t_max_diff = np.argmax(ball_to_cup_diff) - # t_max_ball_height = np.argmax(self.ball_traj[:, 2]) - # max_ball_height = np.max(self.ball_traj[:, 2]) - - # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) - cost = 0.5 * min_dist + 0.5 * dist_final + 0.3 * np.minimum(self.dist_ctxt, 3) + 0.01 * cost_angle - reward = np.exp(-2 * cost) - 1e-3 * action_cost - # if max_ball_height < self.context[2] or ball_to_cup_diff[t_max_ball_height] < 0: - # reward -= 1 - - success = dist_final < 0.05 and self.dist_ctxt < 0.05 - else: - reward = - 1e-3 * action_cost - success = False - - return reward, success, stop_sim - - def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt): - if not ball_in_cup: - cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2) - else: - cost = 2 * dist_to_ctxt ** 2 - print('Context Distance:', dist_to_ctxt) - return cost - - def check_ball_in_cup(self, sim, ball_collision_id): - cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] - for coni in range(0, sim.data.ncon): - con = sim.data.contact[coni] - - collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id - collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id - - if collision or collision_trans: - return True - return False - - def check_collision(self, sim): - for coni in range(0, sim.data.ncon): - con = sim.data.contact[coni] - - collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id - collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids - - if collision or collision_trans: - return True - return False diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py deleted file mode 100644 index 8044eb8..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py +++ /dev/null @@ -1,116 +0,0 @@ -import numpy as np -from alr_envs.alr.mujoco.ball_in_a_cup import alr_reward_fct - - -class BallInACupReward(alr_reward_fct.AlrReward): - def __init__(self, env): - self.env = env - self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below", - "wrist_palm_link_convex_geom", - "wrist_pitch_link_convex_decomposition_p1_geom", - "wrist_pitch_link_convex_decomposition_p2_geom", - "wrist_pitch_link_convex_decomposition_p3_geom", - "wrist_yaw_link_convex_decomposition_p1_geom", - "wrist_yaw_link_convex_decomposition_p2_geom", - "forearm_link_convex_decomposition_p1_geom", - "forearm_link_convex_decomposition_p2_geom"] - - self.ball_id = None - self.ball_collision_id = None - self.goal_id = None - self.goal_final_id = None - self.collision_ids = None - self._is_collided = False - self.collision_penalty = 1000 - - self.ball_traj = None - self.dists = None - self.dists_final = None - self.costs = None - - self.reset(None) - - def reset(self, context): - # self.sim_time = self.env.sim.dtsim_time - self.ball_traj = [] # np.zeros(shape=(self.sim_time, 3)) - self.dists = [] - self.dists_final = [] - self.costs = [] - self.action_costs = [] - self.angle_costs = [] - self.cup_angles = [] - - def compute_reward(self, action): - self.ball_id = self.env.sim.model._body_name2id["ball"] - self.ball_collision_id = self.env.sim.model._geom_name2id["ball_geom"] - self.goal_id = self.env.sim.model._site_name2id["cup_goal"] - self.goal_final_id = self.env.sim.model._site_name2id["cup_goal_final"] - self.collision_ids = [self.env.sim.model._geom_name2id[name] for name in self.collision_objects] - - ball_in_cup = self.check_ball_in_cup(self.env.sim, self.ball_collision_id) - - # Compute the current distance from the ball to the inner part of the cup - goal_pos = self.env.sim.data.site_xpos[self.goal_id] - ball_pos = self.env.sim.data.body_xpos[self.ball_id] - goal_final_pos = self.env.sim.data.site_xpos[self.goal_final_id] - self.dists.append(np.linalg.norm(goal_pos - ball_pos)) - self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) - # self.ball_traj[self.env._steps, :] = ball_pos - self.ball_traj.append(ball_pos) - cup_quat = np.copy(self.env.sim.data.body_xquat[self.env.sim.model._body_name2id["cup"]]) - cup_angle = np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]), - 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)) - cost_angle = (cup_angle - np.pi / 2) ** 2 - self.angle_costs.append(cost_angle) - self.cup_angles.append(cup_angle) - - action_cost = np.sum(np.square(action)) - self.action_costs.append(action_cost) - - self._is_collided = self.check_collision(self.env.sim) or self.env._check_traj_in_joint_limits() - - if self.env._steps == self.env.ep_length - 1 or self._is_collided: - t_min_dist = np.argmin(self.dists) - angle_min_dist = self.cup_angles[t_min_dist] - # cost_angle = (angle_min_dist - np.pi / 2)**2 - - - # min_dist = self.dists[t_min_dist] - dist_final = self.dists_final[-1] - min_dist_final = np.min(self.dists_final) - - # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist + - # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided) - # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided) - reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided) - success = dist_final < 0.05 and ball_in_cup and not self._is_collided - crash = self._is_collided - else: - reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight - success = False - crash = False - - return reward, success, crash - - def check_ball_in_cup(self, sim, ball_collision_id): - cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] - for coni in range(0, sim.data.ncon): - con = sim.data.contact[coni] - - collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id - collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id - - if collision or collision_trans: - return True - return False - - def check_collision(self, sim): - for coni in range(0, sim.data.ncon): - con = sim.data.contact[coni] - - collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id - collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids - - if collision or collision_trans: - return True - return False diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py b/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py deleted file mode 100644 index 4abfb6c..0000000 --- a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py +++ /dev/null @@ -1,194 +0,0 @@ -import os - -import gym.envs.mujoco -import gym.envs.mujoco as mujoco_env -import mujoco_py.builder -import numpy as np -from gym import utils - - -class ALRBallInACupPDEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self, frame_skip=4, apply_gravity_comp=True, simplified: bool = False, - reward_type: str = None, context: np.ndarray = None): - utils.EzPickle.__init__(**locals()) - self._steps = 0 - - self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml") - - self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) - - self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) - self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) - - self.context = context - self.apply_gravity_comp = apply_gravity_comp - self.simplified = simplified - - self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) - self._start_vel = np.zeros(7) - - self.sim_time = 8 # seconds - self._dt = 0.02 - self.ep_length = 4000 # based on 8 seconds with dt = 0.02 int(self.sim_time / self.dt) - if reward_type == "no_context": - from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward - reward_function = BallInACupReward - elif reward_type == "contextual_goal": - from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward - reward_function = BallInACupReward - else: - raise ValueError("Unknown reward type: {}".format(reward_type)) - self.reward_function = reward_function(self) - - mujoco_env.MujocoEnv.__init__(self, self.xml_path, frame_skip) - - @property - def dt(self): - return self._dt - - # TODO: @Max is this even needed? - @property - def start_vel(self): - if self.simplified: - return self._start_vel[1::2] - else: - return self._start_vel - - # def _set_action_space(self): - # if self.simplified: - # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)[1::2] - # else: - # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) - # low, high = bounds.T - # self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) - # return self.action_space - - def reset(self): - self.reward_function.reset(None) - return super().reset() - - def reset_model(self): - init_pos_all = self.init_qpos.copy() - init_pos_robot = self._start_pos - init_vel = np.zeros_like(init_pos_all) - - self._steps = 0 - self._q_pos = [] - self._q_vel = [] - - start_pos = init_pos_all - start_pos[0:7] = init_pos_robot - - self.set_state(start_pos, init_vel) - - return self._get_obs() - - def step(self, a): - reward_dist = 0.0 - angular_vel = 0.0 - reward_ctrl = - np.square(a).sum() - - # if self.simplified: - # tmp = np.zeros(7) - # tmp[1::2] = a - # a = tmp - - if self.apply_gravity_comp: - a += self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0] - - crash = False - try: - self.do_simulation(a, self.frame_skip) - except mujoco_py.builder.MujocoException: - crash = True - # joint_cons_viol = self.check_traj_in_joint_limits() - - ob = self._get_obs() - - if not crash: - reward, success, is_collided = self.reward_function.compute_reward(a) - done = success or is_collided # self._steps == self.sim_steps - 1 - self._steps += 1 - else: - reward = -2000 - success = False - is_collided = False - done = True - - return ob, reward, done, dict(reward_dist=reward_dist, - reward_ctrl=reward_ctrl, - velocity=angular_vel, - # traj=self._q_pos, - action=a, - q_pos=self.sim.data.qpos[0:7].ravel().copy(), - q_vel=self.sim.data.qvel[0:7].ravel().copy(), - is_success=success, - is_collided=is_collided, sim_crash=crash) - - def check_traj_in_joint_limits(self): - return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) - - # TODO: extend observation space - def _get_obs(self): - theta = self.sim.data.qpos.flat[:7] - return np.concatenate([ - np.cos(theta), - np.sin(theta), - # self.get_body_com("target"), # only return target to make problem harder - [self._steps], - ]) - - # These functions are for the task with 3 joint actuations - def extend_des_pos(self, des_pos): - des_pos_full = self._start_pos.copy() - des_pos_full[1] = des_pos[0] - des_pos_full[3] = des_pos[1] - des_pos_full[5] = des_pos[2] - return des_pos_full - - def extend_des_vel(self, des_vel): - des_vel_full = self._start_vel.copy() - des_vel_full[1] = des_vel[0] - des_vel_full[3] = des_vel[1] - des_vel_full[5] = des_vel[2] - return des_vel_full - - def render(self, render_mode, **render_kwargs): - if render_mode == "plot_trajectory": - if self._steps == 1: - import matplotlib.pyplot as plt - # plt.ion() - self.fig, self.axs = plt.subplots(3, 1) - - if self._steps <= 1750: - for ax, cp in zip(self.axs, self.current_pos[1::2]): - ax.scatter(self._steps, cp, s=2, marker=".") - - # self.fig.show() - - else: - super().render(render_mode, **render_kwargs) - - -if __name__ == "__main__": - env = ALRBallInACupPDEnv(reward_type="no_context", simplified=True) - # env = gym.make("alr_envs:ALRBallInACupPDSimpleDetPMP-v0") - # ctxt = np.array([-0.20869846, -0.66376693, 1.18088501]) - - # env.configure(ctxt) - env.reset() - env.render("human") - for i in range(16000): - # test with random actions - ac = 0.02 * env.action_space.sample()[0:7] - # ac = env.start_pos - # ac[0] += np.pi/2 - obs, rew, d, info = env.step(ac) - env.render("human") - - print(rew) - - if d: - break - - env.close() diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py deleted file mode 100644 index e69d4f9..0000000 --- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Union, Tuple - -import numpy as np - -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper - - -class MPWrapper(RawInterfaceWrapper): - - def get_context_mask(self): - return np.hstack([ - [False] * 7, # cos - [False] * 7, # sin - [False] * 7, # joint velocities - [False] * 3, # cup_goal_diff_final - [False] * 3, # cup_goal_diff_top - [True] * 2, # xy position of cup - [False] # env steps - ]) - - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qpos[0:7].copy() - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.sim.data.qvel[0:7].copy() - - # TODO: Fix this - def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]: - if mp.learn_tau: - self.env.env.release_step = action[0] / self.env.dt # Tau value - return action, None - else: - return action, None - - def set_context(self, context): - xyz = np.zeros(3) - xyz[:2] = context - xyz[-1] = 0.840 - self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz - return self.get_observation_from_step(self.env.env._get_obs()) diff --git a/alr_envs/alr/mujoco/gym_table_tennis/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml deleted file mode 100644 index 7772d14..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml deleted file mode 100644 index a8df915..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml deleted file mode 100644 index 011b95a..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml deleted file mode 100644 index ad1ae35..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml deleted file mode 100644 index eb2b347..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml deleted file mode 100644 index 29a21e1..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml +++ /dev/null @@ -1,80 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl deleted file mode 100644 index 133b112..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl deleted file mode 100644 index 047e9df..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl deleted file mode 100644 index 5ff94a2..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl deleted file mode 100644 index c548448..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl deleted file mode 100644 index 495160d..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl deleted file mode 100644 index b4bb322..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl deleted file mode 100644 index f05174e..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl deleted file mode 100644 index eb252d9..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl deleted file mode 100644 index c039f0d..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl deleted file mode 100644 index 250acaf..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl deleted file mode 100644 index 993d0f7..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl deleted file mode 100644 index 8448a3f..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl deleted file mode 100644 index b34963d..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl deleted file mode 100644 index e6aa6b6..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl deleted file mode 100644 index 667902e..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl deleted file mode 100644 index ed66bbb..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl deleted file mode 100644 index aba957d..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl deleted file mode 100644 index 5cca6a9..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl deleted file mode 100644 index 3343e27..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl deleted file mode 100644 index ae505fd..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl deleted file mode 100644 index c36cfec..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl deleted file mode 100644 index dc633c4..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl deleted file mode 100644 index 82d0093..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl deleted file mode 100644 index 7fd5a55..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl deleted file mode 100644 index 76353ae..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl deleted file mode 100644 index a0386f6..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl deleted file mode 100644 index c36f88f..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl deleted file mode 100644 index d00cac1..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl deleted file mode 100644 index 34d1d8b..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl deleted file mode 100644 index 13d2f73..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl deleted file mode 100644 index 06e857f..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl deleted file mode 100644 index 48e1bb1..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl deleted file mode 100644 index 0d95239..0000000 Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl and /dev/null differ diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml deleted file mode 100644 index 9abf102..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml deleted file mode 100644 index dfbc37a..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml deleted file mode 100644 index f2432bb..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py deleted file mode 100644 index 9b16ae1..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py +++ /dev/null @@ -1,244 +0,0 @@ -import numpy as np -from gym import spaces -from gym.envs.robotics import robot_env, utils -# import xml.etree.ElementTree as ET -from alr_envs.alr.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis -import glfw -from alr_envs.alr.mujoco.gym_table_tennis.utils.experiment import ball_initialize -from pathlib import Path -import os - - -class TableTennisEnv(robot_env.RobotEnv): - """Class for Table Tennis environment. - """ - def __init__(self, n_substeps=1, - model_path=None, - initial_qpos=None, - initial_ball_state=None, - config=None, - reward_obj=None - ): - """Initializes a new mujoco based Table Tennis environment. - - Args: - model_path (string): path to the environments XML file - initial_qpos (dict): a dictionary of joint names and values that define the initial - n_actions: Number of joints - n_substeps (int): number of substeps the simulation runs on every call to step - scale (double): limit maximum change in position - initial_ball_state: to reset the ball state - """ - # self.config = config.config - if model_path is None: - path_cws = Path.cwd() - print(path_cws) - current_dir = Path(os.path.split(os.path.realpath(__file__))[0]) - table_tennis_env_xml_path = current_dir / "assets"/"table_tennis_env.xml" - model_path = str(table_tennis_env_xml_path) - self.config = config - action_space = True # self.config['trajectory']['args']['action_space'] - time_step = 0.002 # self.config['mujoco_sim_env']['args']["time_step"] - if initial_qpos is None: - initial_qpos = {"wam/base_yaw_joint_right": 1.5, - "wam/shoulder_pitch_joint_right": 1, - "wam/shoulder_yaw_joint_right": 0, - "wam/elbow_pitch_joint_right": 1, - "wam/wrist_yaw_joint_right": 1, - "wam/wrist_pitch_joint_right": 0, - "wam/palm_yaw_joint_right": 0} - # initial_qpos = [1.5, 1, 0, 1, 1, 0, 0] # self.config['robot_config']['args']['initial_qpos'] - - # TODO should read all configuration in config - assert initial_qpos is not None, "Must initialize the initial q position of robot arm" - n_actions = 7 - self.initial_qpos_value = np.array(list(initial_qpos.values())).copy() - # self.initial_qpos_value = np.array(initial_qpos) - # # change time step in .xml file - # tree = ET.parse(model_path) - # root = tree.getroot() - # for option in root.findall('option'): - # option.set("timestep", str(time_step)) - # - # tree.write(model_path) - - super(TableTennisEnv, self).__init__( - model_path=model_path, n_substeps=n_substeps, n_actions=n_actions, - initial_qpos=initial_qpos) - - if action_space: - self.action_space = spaces.Box(low=np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]), - high=np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]), - dtype='float64') - else: - self.action_space = spaces.Box(low=np.array([-np.inf] * 7), - high=np.array([-np.inf] * 7), - dtype='float64') - self.scale = None - self.desired_pos = None - self.n_actions = n_actions - self.action = None - self.time_step = time_step - self._dt = time_step - self.paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center') - if reward_obj is None: - self.reward_obj = HierarchicalRewardTableTennis() - else: - self.reward_obj = reward_obj - - if initial_ball_state is not None: - self.initial_ball_state = initial_ball_state - else: - self.initial_ball_state = ball_initialize(random=False) - self.target_ball_pos = self.sim.data.get_site_xpos("target_ball") - self.racket_center_pos = self.sim.data.get_site_xpos("wam/paddle_center") - - def close(self): - if self.viewer is not None: - glfw.destroy_window(self.viewer.window) - # self.viewer.window.close() - self.viewer = None - self._viewers = {} - - # GoalEnv methods - # ---------------------------- - def compute_reward(self, achieved_goal, goal, info): - # reset the reward, if action valid - # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"] - # right_court_contact_detector = self.reward_obj.contact_detection(self, right_court_contact_obj) - # if right_court_contact_detector: - # print("can detect the table ball contact") - self.reward_obj.total_reward = 0 - # Stage 1 Hitting - self.reward_obj.hitting(self) - # if not hitted, return the highest reward - if not self.reward_obj.goal_achievement: - # return self.reward_obj.highest_reward - return self.reward_obj.total_reward - # # Stage 2 Right Table Contact - # self.reward_obj.right_table_contact(self) - # if not self.reward_obj.goal_achievement: - # return self.reward_obj.highest_reward - # # Stage 2 Net Contact - # self.reward_obj.net_contact(self) - # if not self.reward_obj.goal_achievement: - # return self.reward_obj.highest_reward - # Stage 3 Opponent court Contact - # self.reward_obj.landing_on_opponent_court(self) - # if not self.reward_obj.goal_achievement: - # print("self.reward_obj.highest_reward: ", self.reward_obj.highest_reward) - # TODO - self.reward_obj.target_achievement(self) - # return self.reward_obj.highest_reward - return self.reward_obj.total_reward - - def _reset_sim(self): - self.sim.set_state(self.initial_state) - [initial_x, initial_y, initial_z, v_x, v_y, v_z] = self.initial_ball_state - self.sim.data.set_joint_qpos('tar:x', initial_x) - self.sim.data.set_joint_qpos('tar:y', initial_y) - self.sim.data.set_joint_qpos('tar:z', initial_z) - self.energy_corrected = True - self.give_reflection_reward = False - - # velocity is positive direction - self.sim.data.set_joint_qvel('tar:x', v_x) - self.sim.data.set_joint_qvel('tar:y', v_y) - self.sim.data.set_joint_qvel('tar:z', v_z) - - # Apply gravity compensation - if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]: - self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] - self.sim.forward() - return True - - def _env_setup(self, initial_qpos): - for name, value in initial_qpos.items(): - self.sim.data.set_joint_qpos(name, value) - - # Apply gravity compensation - if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]: - self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] - self.sim.forward() - - # Get the target position - self.initial_paddle_center_xpos = self.sim.data.get_site_xpos('wam/paddle_center').copy() - self.initial_paddle_center_vel = None # self.sim.get_site_ - - def _sample_goal(self): - goal = self.initial_paddle_center_xpos[:3] + self.np_random.uniform(-0.2, 0.2, size=3) - return goal.copy() - - def _get_obs(self): - - # positions of racket center - paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center') - ball_pos = self.sim.data.get_site_xpos("target_ball") - - dt = self.sim.nsubsteps * self.sim.model.opt.timestep - paddle_center_velp = self.sim.data.get_site_xvelp('wam/paddle_center') * dt - robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) - - wrist_state = robot_qpos[-3:] - wrist_vel = robot_qvel[-3:] * dt # change to a scalar if the gripper is made symmetric - - # achieved_goal = paddle_body_EE_pos - obs = np.concatenate([ - paddle_center_pos, paddle_center_velp, wrist_state, wrist_vel - ]) - - out_dict = { - 'observation': obs.copy(), - 'achieved_goal': paddle_center_pos.copy(), - 'desired_goal': self.goal.copy(), - 'q_pos': self.sim.data.qpos[:].copy(), - "ball_pos": ball_pos.copy(), - # "hitting_flag": self.reward_obj.hitting_flag - } - - return out_dict - - def _step_callback(self): - pass - - def _set_action(self, action): - # Apply gravity compensation - if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]: - self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7] - # print("set action process running") - assert action.shape == (self.n_actions,) - self.action = action.copy() # ensure that we don't change the action outside of this scope - pos_ctrl = self.action[:] # limit maximum change in position - pos_ctrl = np.clip(pos_ctrl, self.action_space.low, self.action_space.high) - - # get desired trajectory - self.sim.data.qpos[:7] = pos_ctrl - self.sim.forward() - self.desired_pos = self.sim.data.get_site_xpos('wam/paddle_center').copy() - - self.sim.data.ctrl[:] = pos_ctrl - - def _is_success(self, achieved_goal, desired_goal): - pass - - -if __name__ == '__main__': - render_mode = "human" # "human" or "partial" or "final" - env = TableTennisEnv() - env.reset() - # env.render(mode=render_mode) - - for i in range(500): - # objective.load_result("/tmp/cma") - # test with random actions - ac = env.action_space.sample() - # ac[0] += np.pi/2 - obs, rew, d, info = env.step(ac) - env.render(mode=render_mode) - - print(rew) - - if d: - break - - env.close() diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py deleted file mode 100644 index a106d68..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py +++ /dev/null @@ -1,83 +0,0 @@ -import numpy as np -from gym.utils import seeding -from alr_envs.alr.mujoco.gym_table_tennis.utils.util import read_yaml, read_json -from pathlib import Path - - -def ball_initialize(random=False, scale=False, context_range=None, scale_value=None): - if random: - if scale: - # if scale_value is None: - scale_value = context_scale_initialize(context_range) - v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value - dx = 1 - dy = 0 - dz = 0.05 - else: - seed = None - np_random, seed = seeding.np_random(seed) - dx = np_random.uniform(-0.1, 0.1) - dy = np_random.uniform(-0.1, 0.1) - dz = np_random.uniform(-0.1, 0.1) - - v_x = np_random.uniform(1.7, 1.8) - v_y = np_random.uniform(0.7, 0.8) - v_z = np_random.uniform(0.1, 0.2) - # print(dx, dy, dz, v_x, v_y, v_z) - # else: - # dx = -0.1 - # dy = 0.05 - # dz = 0.05 - # v_x = 1.5 - # v_y = 0.7 - # v_z = 0.06 - # initial_x = -0.6 + dx - # initial_y = -0.3 + dy - # initial_z = 0.8 + dz - else: - if scale: - v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value - else: - v_x = 2.5 - v_y = 2 - v_z = 0.5 - dx = 1 - dy = 0 - dz = 0.05 - - initial_x = 0 + dx - initial_y = -0.2 + dy - initial_z = 0.3 + dz - # print("initial ball state: ", initial_x, initial_y, initial_z, v_x, v_y, v_z) - initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z]) - return initial_ball_state - - -def context_scale_initialize(range): - """ - - Returns: - - """ - low, high = range - scale = np.random.uniform(low, high, 1) - return scale - - -def config_handle_generation(config_file_path): - """Generate config handle for multiprocessing - - Args: - config_file_path: - - Returns: - - """ - cfg_fname = Path(config_file_path) - # .json and .yml file - if cfg_fname.suffix == ".json": - config = read_json(cfg_fname) - elif cfg_fname.suffix == ".yml": - config = read_yaml(cfg_fname) - - return config diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py deleted file mode 100644 index fe69104..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py +++ /dev/null @@ -1,402 +0,0 @@ -import numpy as np -import logging - - -class HierarchicalRewardTableTennis(object): - """Class for hierarchical reward function for table tennis experiment. - - Return Highest Reward. - Reward = 0 - Step 1: Action Valid. Upper Bound 0 - [-∞, 0] - Reward += -1 * |hit_duration - hit_duration_threshold| * |hit_duration < hit_duration_threshold| * 10 - Step 2: Hitting. Upper Bound 2 - if hitting: - [0, 2] - Reward = 2 * (1 - tanh(|shortest_hitting_dist|)) - if not hitting: - [0, 0.2] - Reward = 2 * (1 - tanh(|shortest_hitting_dist|)) - Step 3: Target Point Achievement. Upper Bound 6 - [0, 4] - if table_contact_detector: - Reward += 1 - Reward += (1 - tanh(|shortest_hitting_dist|)) * 2 - if contact_coordinate[0] < 0: - Reward += 1 - else: - Reward += 0 - elif: - Reward += (1 - tanh(|shortest_hitting_dist|)) - """ - - def __init__(self): - self.reward = None - self.goal_achievement = False - self.total_reward = 0 - self.shortest_hitting_dist = 1000 - self.highest_reward = -1000 - self.lowest_corner_dist = 100 - self.right_court_contact_detector = False - self.table_contact_detector = False - self.floor_contact_detector = False - self.radius = 0.025 - self.min_ball_x_pos = 100 - self.hit_contact_detector = False - self.net_contact_detector = False - self.ratio = 1 - self.lowest_z = 100 - self.target_flag = False - self.dist_target_virtual = 100 - self.ball_z_pos_lowest = 100 - self.hitting_flag = False - self.hitting_time_point = None - self.ctxt_dim = None - self.context_range_bounds = None - # self.ctxt_out_of_range_punishment = None - # self.ctxt_in_side_of_range_punishment = None - # - # def check_where_invalid(self, ctxt, context_range_bounds, set_to_valid_region=False): - # idx_max = [] - # idx_min = [] - # for dim in range(self.ctxt_dim): - # min_dim = context_range_bounds[0][dim] - # max_dim = context_range_bounds[1][dim] - # idx_max_c = np.where(ctxt[:, dim] > max_dim)[0] - # idx_min_c = np.where(ctxt[:, dim] < min_dim)[0] - # if set_to_valid_region: - # if idx_max_c.shape[0] != 0: - # ctxt[idx_max_c, dim] = max_dim - # if idx_min_c.shape[0] != 0: - # ctxt[idx_min_c, dim] = min_dim - # idx_max.append(idx_max_c) - # idx_min.append(idx_min_c) - # return idx_max, idx_min, ctxt - - def check_valid(self, scale, context_range_bounds): - - min_dim = context_range_bounds[0][0] - max_dim = context_range_bounds[1][0] - valid = (scale < max_dim) and (scale > min_dim) - return valid - - @classmethod - def goal_distance(cls, goal_a, goal_b): - assert goal_a.shape == goal_b.shape - return np.linalg.norm(goal_a - goal_b, axis=-1) - - def refresh_highest_reward(self): - if self.total_reward >= self.highest_reward: - self.highest_reward = self.total_reward - - def duration_valid(self): - pass - - def huge_value_unstable(self): - self.total_reward += -10 - self.highest_reward = -1 - - def context_valid(self, context): - valid = self.check_valid(context.copy(), context_range_bounds=self.context_range_bounds) - # when using dirac punishments - if valid: - self.total_reward += 1 # If Action Valid and Context Valid, total_reward = 0 - else: - self.total_reward += 0 - self.refresh_highest_reward() - - - - # If in the ctxt, add 1, otherwise, 0 - - def action_valid(self, durations=None): - """Ensure the execution of the robot movement with parameters which are in a valid domain. - - Time should always be positive, - the joint position of the robot should be a subset of [−π, π]. - if all parameters are valid, the robot gets a zero score, - otherwise it gets a negative score proportional to how much it is beyond the valid parameter domain. - - Returns: - rewards: if valid, reward is equal to 0. - if not valid, reward is negative and proportional to the distance beyond the valid parameter domain - """ - assert durations.shape[0] == 2, "durations type should be np.array and the shape should be 2" - # pre_duration = durations[0] - hit_duration = durations[1] - # pre_duration_thres = 0.01 - hit_duration_thres = 1 - # self.goal_achievement = np.all( - # [(pre_duration > pre_duration_thres), (hit_duration > hit_duration_thres), (0.3 < pre_duration < 0.6)]) - self.goal_achievement = (hit_duration > hit_duration_thres) - if self.goal_achievement: - self.total_reward = -1 - self.goal_achievement = True - else: - # self.total_reward += -1 * ((np.abs(pre_duration - pre_duration_thres) * int( - # pre_duration < pre_duration_thres) + np.abs(hit_duration - hit_duration_thres) * int( - # hit_duration < hit_duration_thres)) * 10) - self.total_reward = -1 * ((np.abs(hit_duration - hit_duration_thres) * int( - hit_duration < hit_duration_thres)) * 10) - self.total_reward += -1 - self.goal_achievement = False - self.refresh_highest_reward() - - def motion_penalty(self, action, high_motion_penalty): - """Protects the robot from high acceleration and dangerous movement. - """ - if not high_motion_penalty: - reward_ctrl = - 0.05 * np.square(action).sum() - else: - reward_ctrl = - 0.075 * np.square(action).sum() - self.total_reward += reward_ctrl - self.refresh_highest_reward() - self.goal_achievement = True - - def hitting(self, env): # , target_ball_pos, racket_center_pos, hit_contact_detector=False - """Hitting reward calculation - - If racket successfully hit the ball, the reward +1 - Otherwise calculate the distance between the center of racket and the center of ball, - reward = tanh(r/dist) if dist<1 reward almost 2 , if dist >= 1 reward is between [0, 0.2] - - - Args: - env: - - Returns: - - """ - - hit_contact_obj = ["target_ball", "bat"] - target_ball_pos = env.target_ball_pos - racket_center_pos = env.racket_center_pos - # hit contact detection - # Record the hitting history - self.hitting_flag = False - if not self.hit_contact_detector: - self.hit_contact_detector = self.contact_detection(env, hit_contact_obj) - if self.hit_contact_detector: - print("First time detect hitting") - self.hitting_flag = True - if self.hit_contact_detector: - - # TODO - dist = self.goal_distance(target_ball_pos, racket_center_pos) - if dist < 0: - dist = 0 - # print("goal distance is:", dist) - if dist <= self.shortest_hitting_dist: - self.shortest_hitting_dist = dist - # print("shortest_hitting_dist is:", self.shortest_hitting_dist) - # Keep the shortest hitting distance. - dist_reward = 2 * (1 - np.tanh(np.abs(self.shortest_hitting_dist))) - - # TODO sparse - # dist_reward = 2 - - self.total_reward += dist_reward - self.goal_achievement = True - - # if self.hitting_time_point is not None and self.hitting_time_point > 600: - # self.total_reward += 1 - - else: - dist = self.goal_distance(target_ball_pos, racket_center_pos) - if dist <= self.shortest_hitting_dist: - self.shortest_hitting_dist = dist - dist_reward = 1 - np.tanh(self.shortest_hitting_dist) - reward = 0.2 * dist_reward # because it does not hit the ball, so multiply 0.2 - self.total_reward += reward - self.goal_achievement = False - - self.refresh_highest_reward() - - @classmethod - def relu(cls, x): - return np.maximum(0, x) - - # def right_table_contact(self, env): - # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"] - # if env.target_ball_pos[0] >= 0 and env.target_ball_pos[2] >= 0.7: - # # update right court contact detection - # if not self.right_court_contact_detector: - # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj) - # if self.right_court_contact_detector: - # self.contact_x_pos = env.target_ball_pos[0] - # if self.right_court_contact_detector: - # self.total_reward += 1 - norm(0.685, 1).pdf(self.contact_x_pos) # x axis middle of right table - # self.goal_achievement = False - # else: - # self.total_reward += 1 - # self.goal_achievement = True - # # else: - # # self.total_reward += 0 - # # self.goal_achievement = False - # self.refresh_highest_reward() - - # def net_contact(self, env): - # net_contact_obj = ["target_ball", "table_tennis_net"] - # # net_contact_detector = self.contact_detection(env, net_contact_obj) - # # ball_x_pos = env.target_ball_pos[0] - # # if self.min_ball_x_pos >= ball_x_pos: - # # self.min_ball_x_pos = ball_x_pos - # # table_left_edge_x_pos = -1.37 - # # if np.abs(ball_x_pos) <= 0.01: # x threshold of net - # # if self.lowest_z >= env.target_ball_pos[2]: - # # self.lowest_z = env.target_ball_pos[2] - # # # construct a gaussian distribution of z - # # z_reward = 4 - norm(0, 0.1).pdf(self.lowest_z - 0.07625) # maximum 4 - # # self.total_reward += z_reward - # # self.total_reward += 2 - np.minimum(1, self.relu(np.abs(self.min_ball_x_pos))) - # if not self.net_contact_detector: - # self.net_contact_detector = self.contact_detection(env, net_contact_obj) - # if self.net_contact_detector: - # self.total_reward += 0 # very high cost - # self.goal_achievement = False - # else: - # self.total_reward += 1 - # self.goal_achievement = True - # self.refresh_highest_reward() - - # def landing_on_opponent_court(self, env): - # # Very sparse reward - # # don't contact the right side court - # # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"] - # # right_court_contact_detector = self.contact_detection(env, right_court_contact_obj) - # left_court_contact_obj = ["target_ball", "table_tennis_table_left_side"] - # # left_court_contact_detector = self.contact_detection(env, left_court_contact_obj) - # # record the contact history - # # if not self.right_court_contact_detector: - # # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj) - # if not self.table_contact_detector: - # self.table_contact_detector = self.contact_detection(env, left_court_contact_obj) - # - # dist_left_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_up_corner")) - # dist_middle_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("middle_up_corner")) - # dist_left_down_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_down_corner")) - # dist_middle_down_corner = self.goal_distance(env.target_ball_pos, - # env.sim.data.get_site_xpos("middle_down_corner")) - # dist_array = np.array( - # [dist_left_up_corner, dist_middle_up_corner, dist_left_down_corner, dist_middle_down_corner]) - # dist_corner = np.amin(dist_array) - # if self.lowest_corner_dist >= dist_corner: - # self.lowest_corner_dist = dist_corner - # - # right_contact_cost = 1 - # left_contact_reward = 2 - # dist_left_table_reward = (2 - np.tanh(self.lowest_corner_dist)) - # # TODO Try multi dimensional gaussian distribution - # # contact only the left side court - # if self.right_court_contact_detector: - # self.total_reward += 0 - # self.goal_achievement = False - # if self.table_contact_detector: - # self.total_reward += left_contact_reward - # self.goal_achievement = False - # else: - # self.total_reward += dist_left_table_reward - # self.goal_achievement = False - # else: - # self.total_reward += right_contact_cost - # if self.table_contact_detector: - # self.total_reward += left_contact_reward - # self.goal_achievement = True - # else: - # self.total_reward += dist_left_table_reward - # self.goal_achievement = False - # self.refresh_highest_reward() - # # if self.left_court_contact_detector and not self.right_court_contact_detector: - # # self.total_reward += self.ratio * left_contact_reward - # # print("only left court reward return!!!!!!!!!") - # # print("contact only left court!!!!!!") - # # self.goal_achievement = True - # # # no contact with table - # # elif not self.right_court_contact_detector and not self.left_court_contact_detector: - # # self.total_reward += 0 + self.ratio * dist_left_table_reward - # # self.goal_achievement = False - # # # contact both side - # # elif self.right_court_contact_detector and self.left_court_contact_detector: - # # self.total_reward += self.ratio * (left_contact_reward - right_contact_cost) # cost of contact of right court - # # self.goal_achievement = False - # # # contact only the right side court - # # elif self.right_court_contact_detector and not self.left_court_contact_detector: - # # self.total_reward += 0 + self.ratio * ( - # # dist_left_table_reward - right_contact_cost) # cost of contact of right court - # # self.goal_achievement = False - - def target_achievement(self, env): - target_coordinate = np.array([-0.5, -0.5]) - # net_contact_obj = ["target_ball", "table_tennis_net"] - table_contact_obj = ["target_ball", "table_tennis_table"] - floor_contact_obj = ["target_ball", "floor"] - - if 0.78 < env.target_ball_pos[2] < 0.8: - dist_target_virtual = np.linalg.norm(env.target_ball_pos[:2] - target_coordinate) - if self.dist_target_virtual > dist_target_virtual: - self.dist_target_virtual = dist_target_virtual - if -0.07 < env.target_ball_pos[0] < 0.07 and env.sim.data.get_joint_qvel('tar:x') < 0: - if self.ball_z_pos_lowest > env.target_ball_pos[2]: - self.ball_z_pos_lowest = env.target_ball_pos[2].copy() - # if not self.net_contact_detector: - # self.net_contact_detector = self.contact_detection(env, net_contact_obj) - if not self.table_contact_detector: - self.table_contact_detector = self.contact_detection(env, table_contact_obj) - if not self.floor_contact_detector: - self.floor_contact_detector = self.contact_detection(env, floor_contact_obj) - if not self.target_flag: - # Table Contact Reward. - if self.table_contact_detector: - self.total_reward += 1 - # only update when the first contact because of the flag - contact_coordinate = env.target_ball_pos[:2].copy() - print("contact table ball coordinate: ", env.target_ball_pos) - logging.info("contact table ball coordinate: {}".format(env.target_ball_pos)) - dist_target = np.linalg.norm(contact_coordinate - target_coordinate) - self.total_reward += (1 - np.tanh(dist_target)) * 2 - self.target_flag = True - # Net Contact Reward. Precondition: Table Contact exits. - if contact_coordinate[0] < 0: - print("left table contact") - logging.info("~~~~~~~~~~~~~~~left table contact~~~~~~~~~~~~~~~") - self.total_reward += 1 - # TODO Z coordinate reward - # self.total_reward += np.maximum(np.tanh(self.ball_z_pos_lowest), 0) - self.goal_achievement = True - else: - print("right table contact") - logging.info("~~~~~~~~~~~~~~~right table contact~~~~~~~~~~~~~~~") - self.total_reward += 0 - self.goal_achievement = False - # if self.net_contact_detector: - # self.total_reward += 0 - # self.goal_achievement = False - # else: - # self.total_reward += 1 - # self.goal_achievement = False - # Floor Contact Reward. Precondition: Table Contact exits. - elif self.floor_contact_detector: - self.total_reward += (1 - np.tanh(self.dist_target_virtual)) - self.target_flag = True - self.goal_achievement = False - # No Contact of Floor or Table, flying - else: - pass - # else: - # print("Flag is True already") - self.refresh_highest_reward() - - def distance_to_target(self): - pass - - @classmethod - def contact_detection(cls, env, goal_contact): - for i in range(env.sim.data.ncon): - contact = env.sim.data.contact[i] - achieved_geom1_name = env.sim.model.geom_id2name(contact.geom1) - achieved_geom2_name = env.sim.model.geom_id2name(contact.geom2) - if np.all([(achieved_geom1_name in goal_contact), (achieved_geom2_name in goal_contact)]): - print("contact of " + achieved_geom1_name + " " + achieved_geom2_name) - return True - else: - return False diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py deleted file mode 100644 index 6e6aa32..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2017 The dm_control Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -# """Soft indicator function evaluating whether a number is within bounds.""" -# -# from __future__ import absolute_import -# from __future__ import division -# from __future__ import print_function - -# Internal dependencies. -import numpy as np - -# The value returned by tolerance() at `margin` distance from `bounds` interval. -_DEFAULT_VALUE_AT_MARGIN = 0.1 - - -def _sigmoids(x, value_at_1, sigmoid): - """Returns 1 when `x` == 0, between 0 and 1 otherwise. - - Args: - x: A scalar or numpy array. - value_at_1: A float between 0 and 1 specifying the output when `x` == 1. - sigmoid: String, choice of sigmoid type. - - Returns: - A numpy array with values between 0.0 and 1.0. - - Raises: - ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and - `quadratic` sigmoids which allow `value_at_1` == 0. - ValueError: If `sigmoid` is of an unknown type. - """ - if sigmoid in ('cosine', 'linear', 'quadratic'): - if not 0 <= value_at_1 < 1: - raise ValueError('`value_at_1` must be nonnegative and smaller than 1, ' - 'got {}.'.format(value_at_1)) - else: - if not 0 < value_at_1 < 1: - raise ValueError('`value_at_1` must be strictly between 0 and 1, ' - 'got {}.'.format(value_at_1)) - - if sigmoid == 'gaussian': - scale = np.sqrt(-2 * np.log(value_at_1)) - return np.exp(-0.5 * (x*scale)**2) - - elif sigmoid == 'hyperbolic': - scale = np.arccosh(1/value_at_1) - return 1 / np.cosh(x*scale) - - elif sigmoid == 'long_tail': - scale = np.sqrt(1/value_at_1 - 1) - return 1 / ((x*scale)**2 + 1) - - elif sigmoid == 'cosine': - scale = np.arccos(2*value_at_1 - 1) / np.pi - scaled_x = x*scale - return np.where(abs(scaled_x) < 1, (1 + np.cos(np.pi*scaled_x))/2, 0.0) - - elif sigmoid == 'linear': - scale = 1-value_at_1 - scaled_x = x*scale - return np.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0) - - elif sigmoid == 'quadratic': - scale = np.sqrt(1-value_at_1) - scaled_x = x*scale - return np.where(abs(scaled_x) < 1, 1 - scaled_x**2, 0.0) - - elif sigmoid == 'tanh_squared': - scale = np.arctanh(np.sqrt(1-value_at_1)) - return 1 - np.tanh(x*scale)**2 - - else: - raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid)) - - -def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian', - value_at_margin=_DEFAULT_VALUE_AT_MARGIN): - """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise. - - Args: - x: A scalar or numpy array. - bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for - the target interval. These can be infinite if the interval is unbounded - at one or both ends, or they can be equal to one another if the target - value is exact. - margin: Float. Parameter that controls how steeply the output decreases as - `x` moves out-of-bounds. - * If `margin == 0` then the output will be 0 for all values of `x` - outside of `bounds`. - * If `margin > 0` then the output will decrease sigmoidally with - increasing distance from the nearest bound. - sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian', - 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'. - value_at_margin: A float between 0 and 1 specifying the output value when - the distance from `x` to the nearest bound is equal to `margin`. Ignored - if `margin == 0`. - - Returns: - A float or numpy array with values between 0.0 and 1.0. - - Raises: - ValueError: If `bounds[0] > bounds[1]`. - ValueError: If `margin` is negative. - """ - lower, upper = bounds - if lower > upper: - raise ValueError('Lower bound must be <= upper bound.') - if margin < 0: - raise ValueError('`margin` must be non-negative.') - - in_bounds = np.logical_and(lower <= x, x <= upper) - if margin == 0: - value = np.where(in_bounds, 1.0, 0.0) - else: - d = np.where(x < lower, lower - x, x - upper) / margin - value = np.where(in_bounds, 1.0, _sigmoids(d, value_at_margin, sigmoid)) - - return float(value) if np.isscalar(x) else value - - - - - diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py deleted file mode 100644 index fa308e3..0000000 --- a/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py +++ /dev/null @@ -1,49 +0,0 @@ -import json -import yaml -import xml.etree.ElementTree as ET -from collections import OrderedDict -from pathlib import Path - - -def read_json(fname): - fname = Path(fname) - with fname.open('rt') as handle: - return json.load(handle, object_hook=OrderedDict) - - -def write_json(content, fname): - fname = Path(fname) - with fname.open('wt') as handle: - json.dump(content, handle, indent=4, sort_keys=False) - - -def read_yaml(fname): - fname = Path(fname) - with fname.open('rt') as handle: - return yaml.load(handle, Loader=yaml.FullLoader) - - -def write_yaml(content, fname): - fname = Path(fname) - with fname.open('wt') as handle: - yaml.dump(content, handle) - - -def config_save(dir_path, config): - dir_path = Path(dir_path) - config_path_json = dir_path / "config.json" - config_path_yaml = dir_path / "config.yml" - # .json and .yml file,save 2 version of configuration. - write_json(config, config_path_json) - write_yaml(config, config_path_yaml) - - -def change_kp_in_xml(kp_list, - model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/simple_reacher/robotics/assets/table_tennis/right_arm_actuator.xml"): - tree = ET.parse(model_path) - root = tree.getroot() - # for actuator in root.find("actuator"): - for position, kp in zip(root.iter('position'), kp_list): - position.set("kp", str(kp)) - tree.write(model_path) - diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py deleted file mode 100644 index c5e6d2f..0000000 --- a/alr_envs/alr/mujoco/table_tennis/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .mp_wrapper import MPWrapper diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py deleted file mode 100644 index 40e4252..0000000 --- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py +++ /dev/null @@ -1,38 +0,0 @@ -from typing import Tuple, Union - -import numpy as np - -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper - - -class MPWrapper(RawInterfaceWrapper): - - @property - def context_mask(self) -> np.ndarray: - # TODO: @Max Filter observations correctly - return np.hstack([ - [False] * 7, # Joint Pos - [True] * 2, # Ball pos - [True] * 2 # goal pos - ]) - - @property - def start_pos(self): - return self.self.init_qpos_tt - - @property - def current_pos(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qpos[:7].copy() - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.sim.data.qvel[:7].copy() - - @property - def goal_pos(self): - # TODO: @Max I think the default value of returning to the start is reasonable here - raise ValueError("Goal position is not available and has to be learnt based on the environment.") - - @property - def dt(self) -> Union[float, int]: - return self.env.dt diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py deleted file mode 100644 index 33db936..0000000 --- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py +++ /dev/null @@ -1,184 +0,0 @@ -import os - -import numpy as np -import mujoco_py -from gym import utils, spaces -from gym.envs.mujoco import MujocoEnv - -from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init -from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward - -#TODO: Check for simulation stability. Make sure the code runs even for sim crash - -MAX_EPISODE_STEPS = 1375 # (1.25 + 1.5) /0.002 -BALL_NAME_CONTACT = "target_ball_contact" -BALL_NAME = "target_ball" -TABLE_NAME = 'table_tennis_table' -FLOOR_NAME = 'floor' -PADDLE_CONTACT_1_NAME = 'bat' -PADDLE_CONTACT_2_NAME = 'bat_back' -RACKET_NAME = 'bat' -# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]]) -CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]]) -CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]]) - - -class TTEnvGym(MujocoEnv, utils.EzPickle): - - def __init__(self, ctxt_dim=2, fixed_goal=False): - model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml') - - self.ctxt_dim = ctxt_dim - self.fixed_goal = fixed_goal - if ctxt_dim == 2: - self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM - if self.fixed_goal: - self.goal = np.array([-1, -0.1, 0]) - else: - self.goal = np.zeros(3) # 2 x,y + 1z - elif ctxt_dim == 4: - self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM - self.goal = np.zeros(3) - else: - raise ValueError("either 2 or 4 dimensional Contexts available") - - # has no effect as it is overwritten in init of super - # action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]) - # action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]) - # self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64') - - self.time_steps = 0 - self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0]) - self.init_qvel_tt = np.zeros(10) - - self.reward_func = TT_Reward(self.ctxt_dim) - self.ball_landing_pos = None - self.hit_ball = False - self.ball_contact_after_hit = False - self._ids_set = False - super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1) - self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. - self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] - self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] - self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME] - self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this - self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this - self.racket_id = self.sim.model._geom_name2id[RACKET_NAME] - - - def _set_ids(self): - self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func. - self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME] - self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME] - self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this - self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this - self.racket_id = self.sim.model._geom_name2id[RACKET_NAME] - self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT] - self._ids_set = True - - def _get_obs(self): - ball_pos = self.sim.data.body_xpos[self.ball_id][:2].copy() - goal_pos = self.goal[:2].copy() - obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions - ball_pos, - goal_pos]) - return obs - - def sample_context(self): - return self.np_random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim) - - def reset_model(self): - self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state - self.time_steps = 0 - self.ball_landing_pos = None - self.hit_ball = False - self.ball_contact_after_hit = False - if self.fixed_goal: - self.goal = self.goal[:2] - else: - self.goal = self.sample_context()[:2] - if self.ctxt_dim == 2: - initial_ball_state = ball_init(random=False) # fixed velocity, fixed position - elif self.ctxt_dim == 4: - initial_ball_state = ball_init(random=False)#raise NotImplementedError - - self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0]) - self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1]) - self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2]) - - self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3]) - self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4]) - self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5]) - - z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77])) - self.goal = z_extended_goal_pos - self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow - self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green - self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red - self.sim.forward() - - self.reward_func.reset(self.goal) # reset the reward function - self.n_step = 0 - return self._get_obs() - - def _contact_checker(self, id_1, id_2): - for coni in range(0, self.sim.data.ncon): - con = self.sim.data.contact[coni] - collision = con.geom1 == id_1 and con.geom2 == id_2 - collision_trans = con.geom1 == id_2 and con.geom2 == id_1 - if collision or collision_trans: - return True - return False - - def step(self, action): - if not self._ids_set: - self._set_ids() - done = False - episode_end = False if self.time_steps + 1 < MAX_EPISODE_STEPS else True - if not self.hit_ball: - self.hit_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side - if not self.hit_ball: - self.hit_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side - if self.hit_ball: - if not self.ball_contact_after_hit: - if self._contact_checker(self.ball_contact_id, self.floor_contact_id): # first check contact with floor - self.ball_contact_after_hit = True - self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id] - elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table - self.ball_contact_after_hit = True - self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id] - c_ball_pos = self.sim.data.body_xpos[self.ball_id] - racket_pos = self.sim.data.geom_xpos[self.racket_id] # TODO: use this to reach out the position of the paddle? - if self.ball_landing_pos is not None: - done = True - episode_end =True - reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hit_ball, self.ball_landing_pos) - self.time_steps += 1 - # gravity compensation on joints: - #action += self.sim.data.qfrc_bias[:7].copy() - try: - self.do_simulation(action, self.frame_skip) - except mujoco_py.MujocoException as e: - print('Simulation got unstable returning') - done = True - reward = -25 - ob = self._get_obs() - info = {"hit_ball": self.hit_ball, - "q_pos": np.copy(self.sim.data.qpos[:7]), - "ball_pos": np.copy(self.sim.data.qpos[7:])} - self.n_step += 1 - return ob, reward, done, info # might add some information here .... - - def set_context(self, context): - old_state = self.sim.get_state() - qpos = old_state.qpos.copy() - qvel = old_state.qvel.copy() - self.set_state(qpos, qvel) - self.goal = context - z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77])) - if self.ctxt_dim == 4: - z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77])) - self.goal = z_extended_goal_pos - self.sim.model.body_pos[5] = self.goal[:3] # TODO: Missing: Setting the desired incomoing landing position - self.sim.forward() - return self._get_obs() diff --git a/alr_envs/alr/mujoco/table_tennis/tt_reward.py b/alr_envs/alr/mujoco/table_tennis/tt_reward.py deleted file mode 100644 index 0e1bebf..0000000 --- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy as np - - -class TT_Reward: - - def __init__(self, ctxt_dim): - self.ctxt_dim = ctxt_dim - self.c_goal = None # current desired landing point - self.c_ball_traj = [] - self.c_racket_traj = [] - self.constant = 8 - - def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos): - self.c_ball_traj.append(ball_position.copy()) - self.c_racket_traj.append(racket_pos.copy()) - if not episode_end: - return 0 - else: - # # seems to work for episodic case - min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1)) - if not hited_ball: - return 0.2 * (1 - np.tanh(min_r_b_dist**2)) - else: - if ball_landing_pos is None: - min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1)) - return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2)) - else: - min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2]) - over_net_bonus = int(ball_landing_pos[0] < 0) - return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus - - - # if not hited_ball: - # min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1)) - # return -min_r_b_dist - # else: - # if ball_landing_pos is None: - # dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant - # else: - # dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant - # if dist_to_des_pos < -0.2: - # dist_to_des_pos = -0.2 - # return -dist_to_des_pos - - def reset(self, goal): - self.c_goal = goal.copy() - self.c_ball_traj = [] - self.c_racket_traj = [] \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/tt_utils.py b/alr_envs/alr/mujoco/table_tennis/tt_utils.py deleted file mode 100644 index 04a1b97..0000000 --- a/alr_envs/alr/mujoco/table_tennis/tt_utils.py +++ /dev/null @@ -1,26 +0,0 @@ -import numpy as np - - -def ball_init(random=False, context_range=None): - if random: - dx = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers? - dy = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers? - dz = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers? - - v_x = np.random.uniform(1.7, 1.8) - v_y = np.random.uniform(0.7, 0.8) - v_z = np.random.uniform(0.1, 0.2) - else: - dx = 1 - dy = 0 - dz = 0.05 - - v_x = 2.5 - v_y = 2 - v_z = 0.5 - - initial_x = 0 + dx - 1.2 - initial_y = -0.2 + dy - 0.6 - initial_z = 0.3 + dz + 1.5 - initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z]) - return initial_ball_state diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml deleted file mode 100644 index ec94d96..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml deleted file mode 100644 index 7e04c28..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml +++ /dev/null @@ -1,103 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml deleted file mode 100644 index c313489..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml deleted file mode 100644 index 19543e2..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml deleted file mode 100644 index 25d0366..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml deleted file mode 100644 index e349992..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml deleted file mode 100644 index 9609a6e..0000000 --- a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py deleted file mode 100644 index 63432a7..0000000 --- a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import Tuple, Union - -import numpy as np - -from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper - - -class MPWrapper(RawInterfaceWrapper): - - @property - def context_mask(self): - return np.hstack([ - [False] * 17, - [True] # goal pos - ]) - - @property - def current_pos(self) -> Union[float, int, np.ndarray]: - return self.env.data.qpos[3:9].copy() - - @property - def current_vel(self) -> Union[float, int, np.ndarray, Tuple]: - return self.env.data.qvel[3:9].copy()