diff --git a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py b/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
deleted file mode 100644
index a481d6f..0000000
--- a/alr_envs/alr/mujoco/ant_jump/mp_wrapper.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from typing import Union, Tuple
-
-import numpy as np
-
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- @property
- def context_mask(self):
- return np.hstack([
- [False] * 111, # ant has 111 dimensional observation space !!
- [True] # goal height
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.sim.data.qpos[7:15].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel[6:14].copy()
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py b/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/alr_reward_fct.py b/alr_envs/alr/mujoco/ball_in_a_cup/alr_reward_fct.py
deleted file mode 100644
index c96dd07..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/alr_reward_fct.py
+++ /dev/null
@@ -1,21 +0,0 @@
-class AlrReward:
- """
- A base class for non-Markovian reward functions which may need trajectory information to calculate an episodic
- reward. Call the methods in reset() and step() of the environment.
- """
-
- # methods to override:
- # ----------------------------
- def reset(self, *args, **kwargs):
- """
- Reset the reward function, empty state buffers before an episode, set contexts that influence reward, etc.
- """
- raise NotImplementedError
-
- def compute_reward(self, *args, **kwargs):
- """
-
- Returns: Useful things to return are reward values, success flags or crash flags
-
- """
- raise NotImplementedError
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml b/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml
deleted file mode 100644
index 9229ad5..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/assets/biac_base.xml
+++ /dev/null
@@ -1,361 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
deleted file mode 100644
index 7b286bc..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ /dev/null
@@ -1,195 +0,0 @@
-from gym import utils
-import os
-import numpy as np
-from gym.envs.mujoco import MujocoEnv
-
-
-class ALRBallInACupEnv(MujocoEnv, utils.EzPickle):
- def __init__(
- self, n_substeps=4, apply_gravity_comp=True, simplified: bool = False,
- reward_type: str = None, context: np.ndarray = None
- ):
- utils.EzPickle.__init__(**locals())
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml")
-
- self._q_pos = []
- self._q_vel = []
- # self.weight_matrix_scale = 50
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = context
-
- alr_mujoco_env.AlrMujocoEnv.__init__(self, self.xml_path, apply_gravity_comp=apply_gravity_comp,
- n_substeps=n_substeps)
- self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
- self._start_vel = np.zeros(7)
-
- self.simplified = simplified
-
- self.sim_time = 8 # seconds
- self.sim_steps = int(self.sim_time / self.dt)
- if reward_type == "no_context":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
- reward_function = BallInACupReward
- elif reward_type == "contextual_goal":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
- reward_function = BallInACupReward
- else:
- raise ValueError("Unknown reward type: {}".format(reward_type))
- self.reward_function = reward_function(self.sim_steps)
-
- @property
- def start_pos(self):
- if self.simplified:
- return self._start_pos[1::2]
- else:
- return self._start_pos
-
- @property
- def start_vel(self):
- if self.simplified:
- return self._start_vel[1::2]
- else:
- return self._start_vel
-
- @property
- def current_pos(self):
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self):
- return self.sim.data.qvel[0:7].copy()
-
- def reset(self):
- self.reward_function.reset(None)
- return super().reset()
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self._start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
- self._q_pos = []
- self._q_vel = []
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
-
- self.set_state(start_pos, init_vel)
-
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
-
- crash = self.do_simulation(a)
- # joint_cons_viol = self.check_traj_in_joint_limits()
-
- self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
- self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
-
- ob = self._get_obs()
-
- if not crash:
- reward, success, is_collided = self.reward_function.compute_reward(a, self)
- done = success or self._steps == self.sim_steps - 1 or is_collided
- self._steps += 1
- else:
- reward = -2000
- success = False
- is_collided = False
- done = True
- return ob, reward, done, dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- velocity=angular_vel,
- # traj=self._q_pos,
- action=a,
- q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- is_success=success,
- is_collided=is_collided, sim_crash=crash)
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- # TODO: extend observation space
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
- # TODO
- @property
- def active_obs(self):
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- # These functions are for the task with 3 joint actuations
- def extend_des_pos(self, des_pos):
- des_pos_full = self._start_pos.copy()
- des_pos_full[1] = des_pos[0]
- des_pos_full[3] = des_pos[1]
- des_pos_full[5] = des_pos[2]
- return des_pos_full
-
- def extend_des_vel(self, des_vel):
- des_vel_full = self._start_vel.copy()
- des_vel_full[1] = des_vel[0]
- des_vel_full[3] = des_vel[1]
- des_vel_full[5] = des_vel[2]
- return des_vel_full
-
- def render(self, render_mode, **render_kwargs):
- if render_mode == "plot_trajectory":
- if self._steps == 1:
- import matplotlib.pyplot as plt
- # plt.ion()
- self.fig, self.axs = plt.subplots(3, 1)
-
- if self._steps <= 1750:
- for ax, cp in zip(self.axs, self.current_pos[1::2]):
- ax.scatter(self._steps, cp, s=2, marker=".")
-
- # self.fig.show()
-
- else:
- super().render(render_mode, **render_kwargs)
-
-
-if __name__ == "__main__":
- env = ALRBallInACupEnv()
- ctxt = np.array([-0.20869846, -0.66376693, 1.18088501])
-
- env.configure(ctxt)
- env.reset()
- # env.render()
- for i in range(16000):
- # test with random actions
- ac = 0.001 * env.action_space.sample()[0:7]
- # ac = env.start_pos
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- # env.render()
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
deleted file mode 100644
index 81a08f5..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_mp_wrapper.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class BallInACupMPWrapper(RawInterfaceWrapper):
-
- @property
- def context_mask(self) -> np.ndarray:
- # TODO: @Max Filter observations correctly
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- # [True] * 2, # x-y coordinates of target distance
- [False] # env steps
- ])
-
- @property
- def start_pos(self):
- if self.simplified:
- return self._start_pos[1::2]
- else:
- return self._start_pos
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qvel[0:7].copy()
-
- @property
- def goal_pos(self):
- # TODO: @Max I think the default value of returning to the start is reasonable here
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
deleted file mode 100644
index b4d0e6e..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import numpy as np
-from alr_envs.alr.mujoco.ball_in_a_cup import alr_reward_fct
-
-
-class BallInACupReward(alr_reward_fct.AlrReward):
- def __init__(self, sim_time):
- self.sim_time = sim_time
-
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
-
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
-
- self.ball_traj = None
- self.dists = None
- self.dists_ctxt = None
- self.dists_final = None
- self.costs = None
-
- self.reset(None)
-
- def reset(self, context):
- self.ball_traj = np.zeros(shape=(self.sim_time, 3))
- self.cup_traj = np.zeros(shape=(self.sim_time, 3))
- self.dists = []
- self.dists_ctxt = []
- self.dists_final = []
- self.costs = []
- self.context = context
- self.ball_in_cup = False
- self.ball_above_threshold = False
- self.dist_ctxt = 3
- self.action_costs = []
- self.cup_angles = []
-
- def compute_reward(self, action, sim, step):
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- stop_sim = False
- success = False
-
- self.ball_id = sim.model._body_name2id["ball"]
- self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
- self.goal_id = sim.model._site_name2id["cup_goal"]
- self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
- self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
-
- if self.check_collision(sim):
- reward = - 1e-3 * action_cost - 1000
- stop_sim = True
- return reward, success, stop_sim
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.dists_ctxt.append(np.linalg.norm(ball_pos - self.context))
- self.ball_traj[step, :] = np.copy(ball_pos)
- self.cup_traj[step, :] = np.copy(goal_pos) # ?
- cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]])
- self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
- 1 - 2 * (cup_quat[1] ** 2 + cup_quat[2] ** 2)))
-
- # Determine the first time when ball is in cup
- if not self.ball_in_cup:
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
- self.ball_in_cup = ball_in_cup
- if ball_in_cup:
- dist_to_ctxt = np.linalg.norm(ball_pos - self.context)
- self.dist_ctxt = dist_to_ctxt
-
- if step == self.sim_time - 1:
- t_min_dist = np.argmin(self.dists)
- angle_min_dist = self.cup_angles[t_min_dist]
- cost_angle = (angle_min_dist - np.pi / 2) ** 2
-
- min_dist = np.min(self.dists)
- dist_final = self.dists_final[-1]
- # dist_ctxt = self.dists_ctxt[-1]
-
- # # max distance between ball and cup and cup height at that time
- # ball_to_cup_diff = self.ball_traj[:, 2] - self.cup_traj[:, 2]
- # t_max_diff = np.argmax(ball_to_cup_diff)
- # t_max_ball_height = np.argmax(self.ball_traj[:, 2])
- # max_ball_height = np.max(self.ball_traj[:, 2])
-
- # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
- cost = 0.5 * min_dist + 0.5 * dist_final + 0.3 * np.minimum(self.dist_ctxt, 3) + 0.01 * cost_angle
- reward = np.exp(-2 * cost) - 1e-3 * action_cost
- # if max_ball_height < self.context[2] or ball_to_cup_diff[t_max_ball_height] < 0:
- # reward -= 1
-
- success = dist_final < 0.05 and self.dist_ctxt < 0.05
- else:
- reward = - 1e-3 * action_cost
- success = False
-
- return reward, success, stop_sim
-
- def _get_stage_wise_cost(self, ball_in_cup, min_dist, dist_final, dist_to_ctxt):
- if not ball_in_cup:
- cost = 3 + 2*(0.5 * min_dist**2 + 0.5 * dist_final**2)
- else:
- cost = 2 * dist_to_ctxt ** 2
- print('Context Distance:', dist_to_ctxt)
- return cost
-
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_collision(self, sim):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
-
- if collision or collision_trans:
- return True
- return False
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
deleted file mode 100644
index 8044eb8..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
+++ /dev/null
@@ -1,116 +0,0 @@
-import numpy as np
-from alr_envs.alr.mujoco.ball_in_a_cup import alr_reward_fct
-
-
-class BallInACupReward(alr_reward_fct.AlrReward):
- def __init__(self, env):
- self.env = env
- self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
- "wrist_palm_link_convex_geom",
- "wrist_pitch_link_convex_decomposition_p1_geom",
- "wrist_pitch_link_convex_decomposition_p2_geom",
- "wrist_pitch_link_convex_decomposition_p3_geom",
- "wrist_yaw_link_convex_decomposition_p1_geom",
- "wrist_yaw_link_convex_decomposition_p2_geom",
- "forearm_link_convex_decomposition_p1_geom",
- "forearm_link_convex_decomposition_p2_geom"]
-
- self.ball_id = None
- self.ball_collision_id = None
- self.goal_id = None
- self.goal_final_id = None
- self.collision_ids = None
- self._is_collided = False
- self.collision_penalty = 1000
-
- self.ball_traj = None
- self.dists = None
- self.dists_final = None
- self.costs = None
-
- self.reset(None)
-
- def reset(self, context):
- # self.sim_time = self.env.sim.dtsim_time
- self.ball_traj = [] # np.zeros(shape=(self.sim_time, 3))
- self.dists = []
- self.dists_final = []
- self.costs = []
- self.action_costs = []
- self.angle_costs = []
- self.cup_angles = []
-
- def compute_reward(self, action):
- self.ball_id = self.env.sim.model._body_name2id["ball"]
- self.ball_collision_id = self.env.sim.model._geom_name2id["ball_geom"]
- self.goal_id = self.env.sim.model._site_name2id["cup_goal"]
- self.goal_final_id = self.env.sim.model._site_name2id["cup_goal_final"]
- self.collision_ids = [self.env.sim.model._geom_name2id[name] for name in self.collision_objects]
-
- ball_in_cup = self.check_ball_in_cup(self.env.sim, self.ball_collision_id)
-
- # Compute the current distance from the ball to the inner part of the cup
- goal_pos = self.env.sim.data.site_xpos[self.goal_id]
- ball_pos = self.env.sim.data.body_xpos[self.ball_id]
- goal_final_pos = self.env.sim.data.site_xpos[self.goal_final_id]
- self.dists.append(np.linalg.norm(goal_pos - ball_pos))
- self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- # self.ball_traj[self.env._steps, :] = ball_pos
- self.ball_traj.append(ball_pos)
- cup_quat = np.copy(self.env.sim.data.body_xquat[self.env.sim.model._body_name2id["cup"]])
- cup_angle = np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
- 1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2))
- cost_angle = (cup_angle - np.pi / 2) ** 2
- self.angle_costs.append(cost_angle)
- self.cup_angles.append(cup_angle)
-
- action_cost = np.sum(np.square(action))
- self.action_costs.append(action_cost)
-
- self._is_collided = self.check_collision(self.env.sim) or self.env._check_traj_in_joint_limits()
-
- if self.env._steps == self.env.ep_length - 1 or self._is_collided:
- t_min_dist = np.argmin(self.dists)
- angle_min_dist = self.cup_angles[t_min_dist]
- # cost_angle = (angle_min_dist - np.pi / 2)**2
-
-
- # min_dist = self.dists[t_min_dist]
- dist_final = self.dists_final[-1]
- min_dist_final = np.min(self.dists_final)
-
- # cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
- # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
- # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
- reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-3 * action_cost - self.collision_penalty * int(self._is_collided)
- success = dist_final < 0.05 and ball_in_cup and not self._is_collided
- crash = self._is_collided
- else:
- reward = - 1e-3 * action_cost - 1e-4 * cost_angle # TODO: increase action_cost weight
- success = False
- crash = False
-
- return reward, success, crash
-
- def check_ball_in_cup(self, sim, ball_collision_id):
- cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
- collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
-
- if collision or collision_trans:
- return True
- return False
-
- def check_collision(self, sim):
- for coni in range(0, sim.data.ncon):
- con = sim.data.contact[coni]
-
- collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
- collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
-
- if collision or collision_trans:
- return True
- return False
diff --git a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py b/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py
deleted file mode 100644
index 4abfb6c..0000000
--- a/alr_envs/alr/mujoco/ball_in_a_cup/biac_pd.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import os
-
-import gym.envs.mujoco
-import gym.envs.mujoco as mujoco_env
-import mujoco_py.builder
-import numpy as np
-from gym import utils
-
-
-class ALRBallInACupPDEnv(mujoco_env.MujocoEnv, utils.EzPickle):
- def __init__(self, frame_skip=4, apply_gravity_comp=True, simplified: bool = False,
- reward_type: str = None, context: np.ndarray = None):
- utils.EzPickle.__init__(**locals())
- self._steps = 0
-
- self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "biac_base.xml")
-
- self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
-
- self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
- self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
-
- self.context = context
- self.apply_gravity_comp = apply_gravity_comp
- self.simplified = simplified
-
- self._start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
- self._start_vel = np.zeros(7)
-
- self.sim_time = 8 # seconds
- self._dt = 0.02
- self.ep_length = 4000 # based on 8 seconds with dt = 0.02 int(self.sim_time / self.dt)
- if reward_type == "no_context":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
- reward_function = BallInACupReward
- elif reward_type == "contextual_goal":
- from alr_envs.alr.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
- reward_function = BallInACupReward
- else:
- raise ValueError("Unknown reward type: {}".format(reward_type))
- self.reward_function = reward_function(self)
-
- mujoco_env.MujocoEnv.__init__(self, self.xml_path, frame_skip)
-
- @property
- def dt(self):
- return self._dt
-
- # TODO: @Max is this even needed?
- @property
- def start_vel(self):
- if self.simplified:
- return self._start_vel[1::2]
- else:
- return self._start_vel
-
- # def _set_action_space(self):
- # if self.simplified:
- # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)[1::2]
- # else:
- # bounds = self.model.actuator_ctrlrange.copy().astype(np.float32)
- # low, high = bounds.T
- # self.action_space = spaces.Box(low=low, high=high, dtype=np.float32)
- # return self.action_space
-
- def reset(self):
- self.reward_function.reset(None)
- return super().reset()
-
- def reset_model(self):
- init_pos_all = self.init_qpos.copy()
- init_pos_robot = self._start_pos
- init_vel = np.zeros_like(init_pos_all)
-
- self._steps = 0
- self._q_pos = []
- self._q_vel = []
-
- start_pos = init_pos_all
- start_pos[0:7] = init_pos_robot
-
- self.set_state(start_pos, init_vel)
-
- return self._get_obs()
-
- def step(self, a):
- reward_dist = 0.0
- angular_vel = 0.0
- reward_ctrl = - np.square(a).sum()
-
- # if self.simplified:
- # tmp = np.zeros(7)
- # tmp[1::2] = a
- # a = tmp
-
- if self.apply_gravity_comp:
- a += self.sim.data.qfrc_bias[:len(a)].copy() / self.model.actuator_gear[:, 0]
-
- crash = False
- try:
- self.do_simulation(a, self.frame_skip)
- except mujoco_py.builder.MujocoException:
- crash = True
- # joint_cons_viol = self.check_traj_in_joint_limits()
-
- ob = self._get_obs()
-
- if not crash:
- reward, success, is_collided = self.reward_function.compute_reward(a)
- done = success or is_collided # self._steps == self.sim_steps - 1
- self._steps += 1
- else:
- reward = -2000
- success = False
- is_collided = False
- done = True
-
- return ob, reward, done, dict(reward_dist=reward_dist,
- reward_ctrl=reward_ctrl,
- velocity=angular_vel,
- # traj=self._q_pos,
- action=a,
- q_pos=self.sim.data.qpos[0:7].ravel().copy(),
- q_vel=self.sim.data.qvel[0:7].ravel().copy(),
- is_success=success,
- is_collided=is_collided, sim_crash=crash)
-
- def check_traj_in_joint_limits(self):
- return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
-
- # TODO: extend observation space
- def _get_obs(self):
- theta = self.sim.data.qpos.flat[:7]
- return np.concatenate([
- np.cos(theta),
- np.sin(theta),
- # self.get_body_com("target"), # only return target to make problem harder
- [self._steps],
- ])
-
- # These functions are for the task with 3 joint actuations
- def extend_des_pos(self, des_pos):
- des_pos_full = self._start_pos.copy()
- des_pos_full[1] = des_pos[0]
- des_pos_full[3] = des_pos[1]
- des_pos_full[5] = des_pos[2]
- return des_pos_full
-
- def extend_des_vel(self, des_vel):
- des_vel_full = self._start_vel.copy()
- des_vel_full[1] = des_vel[0]
- des_vel_full[3] = des_vel[1]
- des_vel_full[5] = des_vel[2]
- return des_vel_full
-
- def render(self, render_mode, **render_kwargs):
- if render_mode == "plot_trajectory":
- if self._steps == 1:
- import matplotlib.pyplot as plt
- # plt.ion()
- self.fig, self.axs = plt.subplots(3, 1)
-
- if self._steps <= 1750:
- for ax, cp in zip(self.axs, self.current_pos[1::2]):
- ax.scatter(self._steps, cp, s=2, marker=".")
-
- # self.fig.show()
-
- else:
- super().render(render_mode, **render_kwargs)
-
-
-if __name__ == "__main__":
- env = ALRBallInACupPDEnv(reward_type="no_context", simplified=True)
- # env = gym.make("alr_envs:ALRBallInACupPDSimpleDetPMP-v0")
- # ctxt = np.array([-0.20869846, -0.66376693, 1.18088501])
-
- # env.configure(ctxt)
- env.reset()
- env.render("human")
- for i in range(16000):
- # test with random actions
- ac = 0.02 * env.action_space.sample()[0:7]
- # ac = env.start_pos
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- env.render("human")
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py b/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
deleted file mode 100644
index e69d4f9..0000000
--- a/alr_envs/alr/mujoco/beerpong/mp_wrapper.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from typing import Union, Tuple
-
-import numpy as np
-
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- def get_context_mask(self):
- return np.hstack([
- [False] * 7, # cos
- [False] * 7, # sin
- [False] * 7, # joint velocities
- [False] * 3, # cup_goal_diff_final
- [False] * 3, # cup_goal_diff_top
- [True] * 2, # xy position of cup
- [False] # env steps
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qpos[0:7].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.sim.data.qvel[0:7].copy()
-
- # TODO: Fix this
- def _episode_callback(self, action: np.ndarray, mp) -> Tuple[np.ndarray, Union[np.ndarray, None]]:
- if mp.learn_tau:
- self.env.env.release_step = action[0] / self.env.dt # Tau value
- return action, None
- else:
- return action, None
-
- def set_context(self, context):
- xyz = np.zeros(3)
- xyz[:2] = context
- xyz[-1] = 0.840
- self.env.env.model.body_pos[self.env.env.cup_table_id] = xyz
- return self.get_observation_from_step(self.env.env._get_obs())
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml
deleted file mode 100644
index 7772d14..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_7_motor_actuator.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml
deleted file mode 100644
index a8df915..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_left.xml
+++ /dev/null
@@ -1,76 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml
deleted file mode 100644
index 011b95a..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_barrett_wam_7dof_right.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml
deleted file mode 100644
index ad1ae35..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_table.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml
deleted file mode 100644
index eb2b347..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_target_ball.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml
deleted file mode 100644
index 29a21e1..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/include_test_balls.xml
+++ /dev/null
@@ -1,80 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl
deleted file mode 100644
index 133b112..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl
deleted file mode 100644
index 047e9df..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/base_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl
deleted file mode 100644
index 5ff94a2..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_dist_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl
deleted file mode 100644
index c548448..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl
deleted file mode 100644
index 495160d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_med_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl
deleted file mode 100644
index b4bb322..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl
deleted file mode 100644
index f05174e..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl
deleted file mode 100644
index eb252d9..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_finger_prox_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl
deleted file mode 100644
index c039f0d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 250acaf..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl
deleted file mode 100644
index 993d0f7..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl
deleted file mode 100644
index 8448a3f..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/bhand_palm_link_convex_decomposition_p4.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl
deleted file mode 100644
index b34963d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/elbow_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl
deleted file mode 100644
index e6aa6b6..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 667902e..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl
deleted file mode 100644
index ed66bbb..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/forearm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl
deleted file mode 100644
index aba957d..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl
deleted file mode 100644
index 5cca6a9..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl
deleted file mode 100644
index 3343e27..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl
deleted file mode 100644
index ae505fd..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl
deleted file mode 100644
index c36cfec..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl
deleted file mode 100644
index dc633c4..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/shoulder_pitch_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl
deleted file mode 100644
index 82d0093..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl
deleted file mode 100644
index 7fd5a55..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl
deleted file mode 100644
index 76353ae..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/upper_arm_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl
deleted file mode 100644
index a0386f6..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_palm_link_convex.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl
deleted file mode 100644
index c36f88f..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl
deleted file mode 100644
index d00cac1..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl
deleted file mode 100644
index 34d1d8b..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_convex_decomposition_p3.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl
deleted file mode 100644
index 13d2f73..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_pitch_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl
deleted file mode 100644
index 06e857f..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p1.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl
deleted file mode 100644
index 48e1bb1..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_convex_decomposition_p2.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl
deleted file mode 100644
index 0d95239..0000000
Binary files a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/meshes/wrist_yaw_link_fine.stl and /dev/null differ
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml
deleted file mode 100644
index 9abf102..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/right_arm_actuator.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml
deleted file mode 100644
index dfbc37a..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/shared.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml b/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml
deleted file mode 100644
index f2432bb..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/assets/table_tennis_env.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py b/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py
deleted file mode 100644
index 9b16ae1..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/envs/table_tennis_env.py
+++ /dev/null
@@ -1,244 +0,0 @@
-import numpy as np
-from gym import spaces
-from gym.envs.robotics import robot_env, utils
-# import xml.etree.ElementTree as ET
-from alr_envs.alr.mujoco.gym_table_tennis.utils.rewards.hierarchical_reward import HierarchicalRewardTableTennis
-import glfw
-from alr_envs.alr.mujoco.gym_table_tennis.utils.experiment import ball_initialize
-from pathlib import Path
-import os
-
-
-class TableTennisEnv(robot_env.RobotEnv):
- """Class for Table Tennis environment.
- """
- def __init__(self, n_substeps=1,
- model_path=None,
- initial_qpos=None,
- initial_ball_state=None,
- config=None,
- reward_obj=None
- ):
- """Initializes a new mujoco based Table Tennis environment.
-
- Args:
- model_path (string): path to the environments XML file
- initial_qpos (dict): a dictionary of joint names and values that define the initial
- n_actions: Number of joints
- n_substeps (int): number of substeps the simulation runs on every call to step
- scale (double): limit maximum change in position
- initial_ball_state: to reset the ball state
- """
- # self.config = config.config
- if model_path is None:
- path_cws = Path.cwd()
- print(path_cws)
- current_dir = Path(os.path.split(os.path.realpath(__file__))[0])
- table_tennis_env_xml_path = current_dir / "assets"/"table_tennis_env.xml"
- model_path = str(table_tennis_env_xml_path)
- self.config = config
- action_space = True # self.config['trajectory']['args']['action_space']
- time_step = 0.002 # self.config['mujoco_sim_env']['args']["time_step"]
- if initial_qpos is None:
- initial_qpos = {"wam/base_yaw_joint_right": 1.5,
- "wam/shoulder_pitch_joint_right": 1,
- "wam/shoulder_yaw_joint_right": 0,
- "wam/elbow_pitch_joint_right": 1,
- "wam/wrist_yaw_joint_right": 1,
- "wam/wrist_pitch_joint_right": 0,
- "wam/palm_yaw_joint_right": 0}
- # initial_qpos = [1.5, 1, 0, 1, 1, 0, 0] # self.config['robot_config']['args']['initial_qpos']
-
- # TODO should read all configuration in config
- assert initial_qpos is not None, "Must initialize the initial q position of robot arm"
- n_actions = 7
- self.initial_qpos_value = np.array(list(initial_qpos.values())).copy()
- # self.initial_qpos_value = np.array(initial_qpos)
- # # change time step in .xml file
- # tree = ET.parse(model_path)
- # root = tree.getroot()
- # for option in root.findall('option'):
- # option.set("timestep", str(time_step))
- #
- # tree.write(model_path)
-
- super(TableTennisEnv, self).__init__(
- model_path=model_path, n_substeps=n_substeps, n_actions=n_actions,
- initial_qpos=initial_qpos)
-
- if action_space:
- self.action_space = spaces.Box(low=np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2]),
- high=np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2]),
- dtype='float64')
- else:
- self.action_space = spaces.Box(low=np.array([-np.inf] * 7),
- high=np.array([-np.inf] * 7),
- dtype='float64')
- self.scale = None
- self.desired_pos = None
- self.n_actions = n_actions
- self.action = None
- self.time_step = time_step
- self._dt = time_step
- self.paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center')
- if reward_obj is None:
- self.reward_obj = HierarchicalRewardTableTennis()
- else:
- self.reward_obj = reward_obj
-
- if initial_ball_state is not None:
- self.initial_ball_state = initial_ball_state
- else:
- self.initial_ball_state = ball_initialize(random=False)
- self.target_ball_pos = self.sim.data.get_site_xpos("target_ball")
- self.racket_center_pos = self.sim.data.get_site_xpos("wam/paddle_center")
-
- def close(self):
- if self.viewer is not None:
- glfw.destroy_window(self.viewer.window)
- # self.viewer.window.close()
- self.viewer = None
- self._viewers = {}
-
- # GoalEnv methods
- # ----------------------------
- def compute_reward(self, achieved_goal, goal, info):
- # reset the reward, if action valid
- # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
- # right_court_contact_detector = self.reward_obj.contact_detection(self, right_court_contact_obj)
- # if right_court_contact_detector:
- # print("can detect the table ball contact")
- self.reward_obj.total_reward = 0
- # Stage 1 Hitting
- self.reward_obj.hitting(self)
- # if not hitted, return the highest reward
- if not self.reward_obj.goal_achievement:
- # return self.reward_obj.highest_reward
- return self.reward_obj.total_reward
- # # Stage 2 Right Table Contact
- # self.reward_obj.right_table_contact(self)
- # if not self.reward_obj.goal_achievement:
- # return self.reward_obj.highest_reward
- # # Stage 2 Net Contact
- # self.reward_obj.net_contact(self)
- # if not self.reward_obj.goal_achievement:
- # return self.reward_obj.highest_reward
- # Stage 3 Opponent court Contact
- # self.reward_obj.landing_on_opponent_court(self)
- # if not self.reward_obj.goal_achievement:
- # print("self.reward_obj.highest_reward: ", self.reward_obj.highest_reward)
- # TODO
- self.reward_obj.target_achievement(self)
- # return self.reward_obj.highest_reward
- return self.reward_obj.total_reward
-
- def _reset_sim(self):
- self.sim.set_state(self.initial_state)
- [initial_x, initial_y, initial_z, v_x, v_y, v_z] = self.initial_ball_state
- self.sim.data.set_joint_qpos('tar:x', initial_x)
- self.sim.data.set_joint_qpos('tar:y', initial_y)
- self.sim.data.set_joint_qpos('tar:z', initial_z)
- self.energy_corrected = True
- self.give_reflection_reward = False
-
- # velocity is positive direction
- self.sim.data.set_joint_qvel('tar:x', v_x)
- self.sim.data.set_joint_qvel('tar:y', v_y)
- self.sim.data.set_joint_qvel('tar:z', v_z)
-
- # Apply gravity compensation
- if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
- self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
- self.sim.forward()
- return True
-
- def _env_setup(self, initial_qpos):
- for name, value in initial_qpos.items():
- self.sim.data.set_joint_qpos(name, value)
-
- # Apply gravity compensation
- if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
- self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
- self.sim.forward()
-
- # Get the target position
- self.initial_paddle_center_xpos = self.sim.data.get_site_xpos('wam/paddle_center').copy()
- self.initial_paddle_center_vel = None # self.sim.get_site_
-
- def _sample_goal(self):
- goal = self.initial_paddle_center_xpos[:3] + self.np_random.uniform(-0.2, 0.2, size=3)
- return goal.copy()
-
- def _get_obs(self):
-
- # positions of racket center
- paddle_center_pos = self.sim.data.get_site_xpos('wam/paddle_center')
- ball_pos = self.sim.data.get_site_xpos("target_ball")
-
- dt = self.sim.nsubsteps * self.sim.model.opt.timestep
- paddle_center_velp = self.sim.data.get_site_xvelp('wam/paddle_center') * dt
- robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
-
- wrist_state = robot_qpos[-3:]
- wrist_vel = robot_qvel[-3:] * dt # change to a scalar if the gripper is made symmetric
-
- # achieved_goal = paddle_body_EE_pos
- obs = np.concatenate([
- paddle_center_pos, paddle_center_velp, wrist_state, wrist_vel
- ])
-
- out_dict = {
- 'observation': obs.copy(),
- 'achieved_goal': paddle_center_pos.copy(),
- 'desired_goal': self.goal.copy(),
- 'q_pos': self.sim.data.qpos[:].copy(),
- "ball_pos": ball_pos.copy(),
- # "hitting_flag": self.reward_obj.hitting_flag
- }
-
- return out_dict
-
- def _step_callback(self):
- pass
-
- def _set_action(self, action):
- # Apply gravity compensation
- if self.sim.data.qfrc_applied[:7] is not self.sim.data.qfrc_bias[:7]:
- self.sim.data.qfrc_applied[:7] = self.sim.data.qfrc_bias[:7]
- # print("set action process running")
- assert action.shape == (self.n_actions,)
- self.action = action.copy() # ensure that we don't change the action outside of this scope
- pos_ctrl = self.action[:] # limit maximum change in position
- pos_ctrl = np.clip(pos_ctrl, self.action_space.low, self.action_space.high)
-
- # get desired trajectory
- self.sim.data.qpos[:7] = pos_ctrl
- self.sim.forward()
- self.desired_pos = self.sim.data.get_site_xpos('wam/paddle_center').copy()
-
- self.sim.data.ctrl[:] = pos_ctrl
-
- def _is_success(self, achieved_goal, desired_goal):
- pass
-
-
-if __name__ == '__main__':
- render_mode = "human" # "human" or "partial" or "final"
- env = TableTennisEnv()
- env.reset()
- # env.render(mode=render_mode)
-
- for i in range(500):
- # objective.load_result("/tmp/cma")
- # test with random actions
- ac = env.action_space.sample()
- # ac[0] += np.pi/2
- obs, rew, d, info = env.step(ac)
- env.render(mode=render_mode)
-
- print(rew)
-
- if d:
- break
-
- env.close()
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py
deleted file mode 100644
index a106d68..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/experiment.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import numpy as np
-from gym.utils import seeding
-from alr_envs.alr.mujoco.gym_table_tennis.utils.util import read_yaml, read_json
-from pathlib import Path
-
-
-def ball_initialize(random=False, scale=False, context_range=None, scale_value=None):
- if random:
- if scale:
- # if scale_value is None:
- scale_value = context_scale_initialize(context_range)
- v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value
- dx = 1
- dy = 0
- dz = 0.05
- else:
- seed = None
- np_random, seed = seeding.np_random(seed)
- dx = np_random.uniform(-0.1, 0.1)
- dy = np_random.uniform(-0.1, 0.1)
- dz = np_random.uniform(-0.1, 0.1)
-
- v_x = np_random.uniform(1.7, 1.8)
- v_y = np_random.uniform(0.7, 0.8)
- v_z = np_random.uniform(0.1, 0.2)
- # print(dx, dy, dz, v_x, v_y, v_z)
- # else:
- # dx = -0.1
- # dy = 0.05
- # dz = 0.05
- # v_x = 1.5
- # v_y = 0.7
- # v_z = 0.06
- # initial_x = -0.6 + dx
- # initial_y = -0.3 + dy
- # initial_z = 0.8 + dz
- else:
- if scale:
- v_x, v_y, v_z = [2.5, 2, 0.5] * scale_value
- else:
- v_x = 2.5
- v_y = 2
- v_z = 0.5
- dx = 1
- dy = 0
- dz = 0.05
-
- initial_x = 0 + dx
- initial_y = -0.2 + dy
- initial_z = 0.3 + dz
- # print("initial ball state: ", initial_x, initial_y, initial_z, v_x, v_y, v_z)
- initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
- return initial_ball_state
-
-
-def context_scale_initialize(range):
- """
-
- Returns:
-
- """
- low, high = range
- scale = np.random.uniform(low, high, 1)
- return scale
-
-
-def config_handle_generation(config_file_path):
- """Generate config handle for multiprocessing
-
- Args:
- config_file_path:
-
- Returns:
-
- """
- cfg_fname = Path(config_file_path)
- # .json and .yml file
- if cfg_fname.suffix == ".json":
- config = read_json(cfg_fname)
- elif cfg_fname.suffix == ".yml":
- config = read_yaml(cfg_fname)
-
- return config
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py
deleted file mode 100644
index fe69104..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/hierarchical_reward.py
+++ /dev/null
@@ -1,402 +0,0 @@
-import numpy as np
-import logging
-
-
-class HierarchicalRewardTableTennis(object):
- """Class for hierarchical reward function for table tennis experiment.
-
- Return Highest Reward.
- Reward = 0
- Step 1: Action Valid. Upper Bound 0
- [-∞, 0]
- Reward += -1 * |hit_duration - hit_duration_threshold| * |hit_duration < hit_duration_threshold| * 10
- Step 2: Hitting. Upper Bound 2
- if hitting:
- [0, 2]
- Reward = 2 * (1 - tanh(|shortest_hitting_dist|))
- if not hitting:
- [0, 0.2]
- Reward = 2 * (1 - tanh(|shortest_hitting_dist|))
- Step 3: Target Point Achievement. Upper Bound 6
- [0, 4]
- if table_contact_detector:
- Reward += 1
- Reward += (1 - tanh(|shortest_hitting_dist|)) * 2
- if contact_coordinate[0] < 0:
- Reward += 1
- else:
- Reward += 0
- elif:
- Reward += (1 - tanh(|shortest_hitting_dist|))
- """
-
- def __init__(self):
- self.reward = None
- self.goal_achievement = False
- self.total_reward = 0
- self.shortest_hitting_dist = 1000
- self.highest_reward = -1000
- self.lowest_corner_dist = 100
- self.right_court_contact_detector = False
- self.table_contact_detector = False
- self.floor_contact_detector = False
- self.radius = 0.025
- self.min_ball_x_pos = 100
- self.hit_contact_detector = False
- self.net_contact_detector = False
- self.ratio = 1
- self.lowest_z = 100
- self.target_flag = False
- self.dist_target_virtual = 100
- self.ball_z_pos_lowest = 100
- self.hitting_flag = False
- self.hitting_time_point = None
- self.ctxt_dim = None
- self.context_range_bounds = None
- # self.ctxt_out_of_range_punishment = None
- # self.ctxt_in_side_of_range_punishment = None
- #
- # def check_where_invalid(self, ctxt, context_range_bounds, set_to_valid_region=False):
- # idx_max = []
- # idx_min = []
- # for dim in range(self.ctxt_dim):
- # min_dim = context_range_bounds[0][dim]
- # max_dim = context_range_bounds[1][dim]
- # idx_max_c = np.where(ctxt[:, dim] > max_dim)[0]
- # idx_min_c = np.where(ctxt[:, dim] < min_dim)[0]
- # if set_to_valid_region:
- # if idx_max_c.shape[0] != 0:
- # ctxt[idx_max_c, dim] = max_dim
- # if idx_min_c.shape[0] != 0:
- # ctxt[idx_min_c, dim] = min_dim
- # idx_max.append(idx_max_c)
- # idx_min.append(idx_min_c)
- # return idx_max, idx_min, ctxt
-
- def check_valid(self, scale, context_range_bounds):
-
- min_dim = context_range_bounds[0][0]
- max_dim = context_range_bounds[1][0]
- valid = (scale < max_dim) and (scale > min_dim)
- return valid
-
- @classmethod
- def goal_distance(cls, goal_a, goal_b):
- assert goal_a.shape == goal_b.shape
- return np.linalg.norm(goal_a - goal_b, axis=-1)
-
- def refresh_highest_reward(self):
- if self.total_reward >= self.highest_reward:
- self.highest_reward = self.total_reward
-
- def duration_valid(self):
- pass
-
- def huge_value_unstable(self):
- self.total_reward += -10
- self.highest_reward = -1
-
- def context_valid(self, context):
- valid = self.check_valid(context.copy(), context_range_bounds=self.context_range_bounds)
- # when using dirac punishments
- if valid:
- self.total_reward += 1 # If Action Valid and Context Valid, total_reward = 0
- else:
- self.total_reward += 0
- self.refresh_highest_reward()
-
-
-
- # If in the ctxt, add 1, otherwise, 0
-
- def action_valid(self, durations=None):
- """Ensure the execution of the robot movement with parameters which are in a valid domain.
-
- Time should always be positive,
- the joint position of the robot should be a subset of [−π, π].
- if all parameters are valid, the robot gets a zero score,
- otherwise it gets a negative score proportional to how much it is beyond the valid parameter domain.
-
- Returns:
- rewards: if valid, reward is equal to 0.
- if not valid, reward is negative and proportional to the distance beyond the valid parameter domain
- """
- assert durations.shape[0] == 2, "durations type should be np.array and the shape should be 2"
- # pre_duration = durations[0]
- hit_duration = durations[1]
- # pre_duration_thres = 0.01
- hit_duration_thres = 1
- # self.goal_achievement = np.all(
- # [(pre_duration > pre_duration_thres), (hit_duration > hit_duration_thres), (0.3 < pre_duration < 0.6)])
- self.goal_achievement = (hit_duration > hit_duration_thres)
- if self.goal_achievement:
- self.total_reward = -1
- self.goal_achievement = True
- else:
- # self.total_reward += -1 * ((np.abs(pre_duration - pre_duration_thres) * int(
- # pre_duration < pre_duration_thres) + np.abs(hit_duration - hit_duration_thres) * int(
- # hit_duration < hit_duration_thres)) * 10)
- self.total_reward = -1 * ((np.abs(hit_duration - hit_duration_thres) * int(
- hit_duration < hit_duration_thres)) * 10)
- self.total_reward += -1
- self.goal_achievement = False
- self.refresh_highest_reward()
-
- def motion_penalty(self, action, high_motion_penalty):
- """Protects the robot from high acceleration and dangerous movement.
- """
- if not high_motion_penalty:
- reward_ctrl = - 0.05 * np.square(action).sum()
- else:
- reward_ctrl = - 0.075 * np.square(action).sum()
- self.total_reward += reward_ctrl
- self.refresh_highest_reward()
- self.goal_achievement = True
-
- def hitting(self, env): # , target_ball_pos, racket_center_pos, hit_contact_detector=False
- """Hitting reward calculation
-
- If racket successfully hit the ball, the reward +1
- Otherwise calculate the distance between the center of racket and the center of ball,
- reward = tanh(r/dist) if dist<1 reward almost 2 , if dist >= 1 reward is between [0, 0.2]
-
-
- Args:
- env:
-
- Returns:
-
- """
-
- hit_contact_obj = ["target_ball", "bat"]
- target_ball_pos = env.target_ball_pos
- racket_center_pos = env.racket_center_pos
- # hit contact detection
- # Record the hitting history
- self.hitting_flag = False
- if not self.hit_contact_detector:
- self.hit_contact_detector = self.contact_detection(env, hit_contact_obj)
- if self.hit_contact_detector:
- print("First time detect hitting")
- self.hitting_flag = True
- if self.hit_contact_detector:
-
- # TODO
- dist = self.goal_distance(target_ball_pos, racket_center_pos)
- if dist < 0:
- dist = 0
- # print("goal distance is:", dist)
- if dist <= self.shortest_hitting_dist:
- self.shortest_hitting_dist = dist
- # print("shortest_hitting_dist is:", self.shortest_hitting_dist)
- # Keep the shortest hitting distance.
- dist_reward = 2 * (1 - np.tanh(np.abs(self.shortest_hitting_dist)))
-
- # TODO sparse
- # dist_reward = 2
-
- self.total_reward += dist_reward
- self.goal_achievement = True
-
- # if self.hitting_time_point is not None and self.hitting_time_point > 600:
- # self.total_reward += 1
-
- else:
- dist = self.goal_distance(target_ball_pos, racket_center_pos)
- if dist <= self.shortest_hitting_dist:
- self.shortest_hitting_dist = dist
- dist_reward = 1 - np.tanh(self.shortest_hitting_dist)
- reward = 0.2 * dist_reward # because it does not hit the ball, so multiply 0.2
- self.total_reward += reward
- self.goal_achievement = False
-
- self.refresh_highest_reward()
-
- @classmethod
- def relu(cls, x):
- return np.maximum(0, x)
-
- # def right_table_contact(self, env):
- # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
- # if env.target_ball_pos[0] >= 0 and env.target_ball_pos[2] >= 0.7:
- # # update right court contact detection
- # if not self.right_court_contact_detector:
- # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
- # if self.right_court_contact_detector:
- # self.contact_x_pos = env.target_ball_pos[0]
- # if self.right_court_contact_detector:
- # self.total_reward += 1 - norm(0.685, 1).pdf(self.contact_x_pos) # x axis middle of right table
- # self.goal_achievement = False
- # else:
- # self.total_reward += 1
- # self.goal_achievement = True
- # # else:
- # # self.total_reward += 0
- # # self.goal_achievement = False
- # self.refresh_highest_reward()
-
- # def net_contact(self, env):
- # net_contact_obj = ["target_ball", "table_tennis_net"]
- # # net_contact_detector = self.contact_detection(env, net_contact_obj)
- # # ball_x_pos = env.target_ball_pos[0]
- # # if self.min_ball_x_pos >= ball_x_pos:
- # # self.min_ball_x_pos = ball_x_pos
- # # table_left_edge_x_pos = -1.37
- # # if np.abs(ball_x_pos) <= 0.01: # x threshold of net
- # # if self.lowest_z >= env.target_ball_pos[2]:
- # # self.lowest_z = env.target_ball_pos[2]
- # # # construct a gaussian distribution of z
- # # z_reward = 4 - norm(0, 0.1).pdf(self.lowest_z - 0.07625) # maximum 4
- # # self.total_reward += z_reward
- # # self.total_reward += 2 - np.minimum(1, self.relu(np.abs(self.min_ball_x_pos)))
- # if not self.net_contact_detector:
- # self.net_contact_detector = self.contact_detection(env, net_contact_obj)
- # if self.net_contact_detector:
- # self.total_reward += 0 # very high cost
- # self.goal_achievement = False
- # else:
- # self.total_reward += 1
- # self.goal_achievement = True
- # self.refresh_highest_reward()
-
- # def landing_on_opponent_court(self, env):
- # # Very sparse reward
- # # don't contact the right side court
- # # right_court_contact_obj = ["target_ball", "table_tennis_table_right_side"]
- # # right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
- # left_court_contact_obj = ["target_ball", "table_tennis_table_left_side"]
- # # left_court_contact_detector = self.contact_detection(env, left_court_contact_obj)
- # # record the contact history
- # # if not self.right_court_contact_detector:
- # # self.right_court_contact_detector = self.contact_detection(env, right_court_contact_obj)
- # if not self.table_contact_detector:
- # self.table_contact_detector = self.contact_detection(env, left_court_contact_obj)
- #
- # dist_left_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_up_corner"))
- # dist_middle_up_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("middle_up_corner"))
- # dist_left_down_corner = self.goal_distance(env.target_ball_pos, env.sim.data.get_site_xpos("left_down_corner"))
- # dist_middle_down_corner = self.goal_distance(env.target_ball_pos,
- # env.sim.data.get_site_xpos("middle_down_corner"))
- # dist_array = np.array(
- # [dist_left_up_corner, dist_middle_up_corner, dist_left_down_corner, dist_middle_down_corner])
- # dist_corner = np.amin(dist_array)
- # if self.lowest_corner_dist >= dist_corner:
- # self.lowest_corner_dist = dist_corner
- #
- # right_contact_cost = 1
- # left_contact_reward = 2
- # dist_left_table_reward = (2 - np.tanh(self.lowest_corner_dist))
- # # TODO Try multi dimensional gaussian distribution
- # # contact only the left side court
- # if self.right_court_contact_detector:
- # self.total_reward += 0
- # self.goal_achievement = False
- # if self.table_contact_detector:
- # self.total_reward += left_contact_reward
- # self.goal_achievement = False
- # else:
- # self.total_reward += dist_left_table_reward
- # self.goal_achievement = False
- # else:
- # self.total_reward += right_contact_cost
- # if self.table_contact_detector:
- # self.total_reward += left_contact_reward
- # self.goal_achievement = True
- # else:
- # self.total_reward += dist_left_table_reward
- # self.goal_achievement = False
- # self.refresh_highest_reward()
- # # if self.left_court_contact_detector and not self.right_court_contact_detector:
- # # self.total_reward += self.ratio * left_contact_reward
- # # print("only left court reward return!!!!!!!!!")
- # # print("contact only left court!!!!!!")
- # # self.goal_achievement = True
- # # # no contact with table
- # # elif not self.right_court_contact_detector and not self.left_court_contact_detector:
- # # self.total_reward += 0 + self.ratio * dist_left_table_reward
- # # self.goal_achievement = False
- # # # contact both side
- # # elif self.right_court_contact_detector and self.left_court_contact_detector:
- # # self.total_reward += self.ratio * (left_contact_reward - right_contact_cost) # cost of contact of right court
- # # self.goal_achievement = False
- # # # contact only the right side court
- # # elif self.right_court_contact_detector and not self.left_court_contact_detector:
- # # self.total_reward += 0 + self.ratio * (
- # # dist_left_table_reward - right_contact_cost) # cost of contact of right court
- # # self.goal_achievement = False
-
- def target_achievement(self, env):
- target_coordinate = np.array([-0.5, -0.5])
- # net_contact_obj = ["target_ball", "table_tennis_net"]
- table_contact_obj = ["target_ball", "table_tennis_table"]
- floor_contact_obj = ["target_ball", "floor"]
-
- if 0.78 < env.target_ball_pos[2] < 0.8:
- dist_target_virtual = np.linalg.norm(env.target_ball_pos[:2] - target_coordinate)
- if self.dist_target_virtual > dist_target_virtual:
- self.dist_target_virtual = dist_target_virtual
- if -0.07 < env.target_ball_pos[0] < 0.07 and env.sim.data.get_joint_qvel('tar:x') < 0:
- if self.ball_z_pos_lowest > env.target_ball_pos[2]:
- self.ball_z_pos_lowest = env.target_ball_pos[2].copy()
- # if not self.net_contact_detector:
- # self.net_contact_detector = self.contact_detection(env, net_contact_obj)
- if not self.table_contact_detector:
- self.table_contact_detector = self.contact_detection(env, table_contact_obj)
- if not self.floor_contact_detector:
- self.floor_contact_detector = self.contact_detection(env, floor_contact_obj)
- if not self.target_flag:
- # Table Contact Reward.
- if self.table_contact_detector:
- self.total_reward += 1
- # only update when the first contact because of the flag
- contact_coordinate = env.target_ball_pos[:2].copy()
- print("contact table ball coordinate: ", env.target_ball_pos)
- logging.info("contact table ball coordinate: {}".format(env.target_ball_pos))
- dist_target = np.linalg.norm(contact_coordinate - target_coordinate)
- self.total_reward += (1 - np.tanh(dist_target)) * 2
- self.target_flag = True
- # Net Contact Reward. Precondition: Table Contact exits.
- if contact_coordinate[0] < 0:
- print("left table contact")
- logging.info("~~~~~~~~~~~~~~~left table contact~~~~~~~~~~~~~~~")
- self.total_reward += 1
- # TODO Z coordinate reward
- # self.total_reward += np.maximum(np.tanh(self.ball_z_pos_lowest), 0)
- self.goal_achievement = True
- else:
- print("right table contact")
- logging.info("~~~~~~~~~~~~~~~right table contact~~~~~~~~~~~~~~~")
- self.total_reward += 0
- self.goal_achievement = False
- # if self.net_contact_detector:
- # self.total_reward += 0
- # self.goal_achievement = False
- # else:
- # self.total_reward += 1
- # self.goal_achievement = False
- # Floor Contact Reward. Precondition: Table Contact exits.
- elif self.floor_contact_detector:
- self.total_reward += (1 - np.tanh(self.dist_target_virtual))
- self.target_flag = True
- self.goal_achievement = False
- # No Contact of Floor or Table, flying
- else:
- pass
- # else:
- # print("Flag is True already")
- self.refresh_highest_reward()
-
- def distance_to_target(self):
- pass
-
- @classmethod
- def contact_detection(cls, env, goal_contact):
- for i in range(env.sim.data.ncon):
- contact = env.sim.data.contact[i]
- achieved_geom1_name = env.sim.model.geom_id2name(contact.geom1)
- achieved_geom2_name = env.sim.model.geom_id2name(contact.geom2)
- if np.all([(achieved_geom1_name in goal_contact), (achieved_geom2_name in goal_contact)]):
- print("contact of " + achieved_geom1_name + " " + achieved_geom2_name)
- return True
- else:
- return False
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py
deleted file mode 100644
index 6e6aa32..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/rewards/rewards.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Copyright 2017 The dm_control Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-# """Soft indicator function evaluating whether a number is within bounds."""
-#
-# from __future__ import absolute_import
-# from __future__ import division
-# from __future__ import print_function
-
-# Internal dependencies.
-import numpy as np
-
-# The value returned by tolerance() at `margin` distance from `bounds` interval.
-_DEFAULT_VALUE_AT_MARGIN = 0.1
-
-
-def _sigmoids(x, value_at_1, sigmoid):
- """Returns 1 when `x` == 0, between 0 and 1 otherwise.
-
- Args:
- x: A scalar or numpy array.
- value_at_1: A float between 0 and 1 specifying the output when `x` == 1.
- sigmoid: String, choice of sigmoid type.
-
- Returns:
- A numpy array with values between 0.0 and 1.0.
-
- Raises:
- ValueError: If not 0 < `value_at_1` < 1, except for `linear`, `cosine` and
- `quadratic` sigmoids which allow `value_at_1` == 0.
- ValueError: If `sigmoid` is of an unknown type.
- """
- if sigmoid in ('cosine', 'linear', 'quadratic'):
- if not 0 <= value_at_1 < 1:
- raise ValueError('`value_at_1` must be nonnegative and smaller than 1, '
- 'got {}.'.format(value_at_1))
- else:
- if not 0 < value_at_1 < 1:
- raise ValueError('`value_at_1` must be strictly between 0 and 1, '
- 'got {}.'.format(value_at_1))
-
- if sigmoid == 'gaussian':
- scale = np.sqrt(-2 * np.log(value_at_1))
- return np.exp(-0.5 * (x*scale)**2)
-
- elif sigmoid == 'hyperbolic':
- scale = np.arccosh(1/value_at_1)
- return 1 / np.cosh(x*scale)
-
- elif sigmoid == 'long_tail':
- scale = np.sqrt(1/value_at_1 - 1)
- return 1 / ((x*scale)**2 + 1)
-
- elif sigmoid == 'cosine':
- scale = np.arccos(2*value_at_1 - 1) / np.pi
- scaled_x = x*scale
- return np.where(abs(scaled_x) < 1, (1 + np.cos(np.pi*scaled_x))/2, 0.0)
-
- elif sigmoid == 'linear':
- scale = 1-value_at_1
- scaled_x = x*scale
- return np.where(abs(scaled_x) < 1, 1 - scaled_x, 0.0)
-
- elif sigmoid == 'quadratic':
- scale = np.sqrt(1-value_at_1)
- scaled_x = x*scale
- return np.where(abs(scaled_x) < 1, 1 - scaled_x**2, 0.0)
-
- elif sigmoid == 'tanh_squared':
- scale = np.arctanh(np.sqrt(1-value_at_1))
- return 1 - np.tanh(x*scale)**2
-
- else:
- raise ValueError('Unknown sigmoid type {!r}.'.format(sigmoid))
-
-
-def tolerance(x, bounds=(0.0, 0.0), margin=0.0, sigmoid='gaussian',
- value_at_margin=_DEFAULT_VALUE_AT_MARGIN):
- """Returns 1 when `x` falls inside the bounds, between 0 and 1 otherwise.
-
- Args:
- x: A scalar or numpy array.
- bounds: A tuple of floats specifying inclusive `(lower, upper)` bounds for
- the target interval. These can be infinite if the interval is unbounded
- at one or both ends, or they can be equal to one another if the target
- value is exact.
- margin: Float. Parameter that controls how steeply the output decreases as
- `x` moves out-of-bounds.
- * If `margin == 0` then the output will be 0 for all values of `x`
- outside of `bounds`.
- * If `margin > 0` then the output will decrease sigmoidally with
- increasing distance from the nearest bound.
- sigmoid: String, choice of sigmoid type. Valid values are: 'gaussian',
- 'linear', 'hyperbolic', 'long_tail', 'cosine', 'tanh_squared'.
- value_at_margin: A float between 0 and 1 specifying the output value when
- the distance from `x` to the nearest bound is equal to `margin`. Ignored
- if `margin == 0`.
-
- Returns:
- A float or numpy array with values between 0.0 and 1.0.
-
- Raises:
- ValueError: If `bounds[0] > bounds[1]`.
- ValueError: If `margin` is negative.
- """
- lower, upper = bounds
- if lower > upper:
- raise ValueError('Lower bound must be <= upper bound.')
- if margin < 0:
- raise ValueError('`margin` must be non-negative.')
-
- in_bounds = np.logical_and(lower <= x, x <= upper)
- if margin == 0:
- value = np.where(in_bounds, 1.0, 0.0)
- else:
- d = np.where(x < lower, lower - x, x - upper) / margin
- value = np.where(in_bounds, 1.0, _sigmoids(d, value_at_margin, sigmoid))
-
- return float(value) if np.isscalar(x) else value
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py b/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py
deleted file mode 100644
index fa308e3..0000000
--- a/alr_envs/alr/mujoco/gym_table_tennis/utils/util.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import json
-import yaml
-import xml.etree.ElementTree as ET
-from collections import OrderedDict
-from pathlib import Path
-
-
-def read_json(fname):
- fname = Path(fname)
- with fname.open('rt') as handle:
- return json.load(handle, object_hook=OrderedDict)
-
-
-def write_json(content, fname):
- fname = Path(fname)
- with fname.open('wt') as handle:
- json.dump(content, handle, indent=4, sort_keys=False)
-
-
-def read_yaml(fname):
- fname = Path(fname)
- with fname.open('rt') as handle:
- return yaml.load(handle, Loader=yaml.FullLoader)
-
-
-def write_yaml(content, fname):
- fname = Path(fname)
- with fname.open('wt') as handle:
- yaml.dump(content, handle)
-
-
-def config_save(dir_path, config):
- dir_path = Path(dir_path)
- config_path_json = dir_path / "config.json"
- config_path_yaml = dir_path / "config.yml"
- # .json and .yml file,save 2 version of configuration.
- write_json(config, config_path_json)
- write_yaml(config, config_path_yaml)
-
-
-def change_kp_in_xml(kp_list,
- model_path="/home/zhou/slow/table_tennis_rl/simulation/gymTableTennis/gym_table_tennis/simple_reacher/robotics/assets/table_tennis/right_arm_actuator.xml"):
- tree = ET.parse(model_path)
- root = tree.getroot()
- # for actuator in root.find("actuator"):
- for position, kp in zip(root.iter('position'), kp_list):
- position.set("kp", str(kp))
- tree.write(model_path)
-
diff --git a/alr_envs/alr/mujoco/table_tennis/__init__.py b/alr_envs/alr/mujoco/table_tennis/__init__.py
deleted file mode 100644
index c5e6d2f..0000000
--- a/alr_envs/alr/mujoco/table_tennis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .mp_wrapper import MPWrapper
diff --git a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py b/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
deleted file mode 100644
index 40e4252..0000000
--- a/alr_envs/alr/mujoco/table_tennis/mp_wrapper.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- @property
- def context_mask(self) -> np.ndarray:
- # TODO: @Max Filter observations correctly
- return np.hstack([
- [False] * 7, # Joint Pos
- [True] * 2, # Ball pos
- [True] * 2 # goal pos
- ])
-
- @property
- def start_pos(self):
- return self.self.init_qpos_tt
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qpos[:7].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.sim.data.qvel[:7].copy()
-
- @property
- def goal_pos(self):
- # TODO: @Max I think the default value of returning to the start is reasonable here
- raise ValueError("Goal position is not available and has to be learnt based on the environment.")
-
- @property
- def dt(self) -> Union[float, int]:
- return self.env.dt
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_gym.py b/alr_envs/alr/mujoco/table_tennis/tt_gym.py
deleted file mode 100644
index 33db936..0000000
--- a/alr_envs/alr/mujoco/table_tennis/tt_gym.py
+++ /dev/null
@@ -1,184 +0,0 @@
-import os
-
-import numpy as np
-import mujoco_py
-from gym import utils, spaces
-from gym.envs.mujoco import MujocoEnv
-
-from alr_envs.alr.mujoco.table_tennis.tt_utils import ball_init
-from alr_envs.alr.mujoco.table_tennis.tt_reward import TT_Reward
-
-#TODO: Check for simulation stability. Make sure the code runs even for sim crash
-
-MAX_EPISODE_STEPS = 1375 # (1.25 + 1.5) /0.002
-BALL_NAME_CONTACT = "target_ball_contact"
-BALL_NAME = "target_ball"
-TABLE_NAME = 'table_tennis_table'
-FLOOR_NAME = 'floor'
-PADDLE_CONTACT_1_NAME = 'bat'
-PADDLE_CONTACT_2_NAME = 'bat_back'
-RACKET_NAME = 'bat'
-# CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.6]])
-CONTEXT_RANGE_BOUNDS_2DIM = np.array([[-1.2, -0.6], [-0.2, 0.0]])
-CONTEXT_RANGE_BOUNDS_4DIM = np.array([[-1.35, -0.75, -1.25, -0.75], [-0.1, 0.75, -0.1, 0.75]])
-
-
-class TTEnvGym(MujocoEnv, utils.EzPickle):
-
- def __init__(self, ctxt_dim=2, fixed_goal=False):
- model_path = os.path.join(os.path.dirname(__file__), "xml", 'table_tennis_env.xml')
-
- self.ctxt_dim = ctxt_dim
- self.fixed_goal = fixed_goal
- if ctxt_dim == 2:
- self.context_range_bounds = CONTEXT_RANGE_BOUNDS_2DIM
- if self.fixed_goal:
- self.goal = np.array([-1, -0.1, 0])
- else:
- self.goal = np.zeros(3) # 2 x,y + 1z
- elif ctxt_dim == 4:
- self.context_range_bounds = CONTEXT_RANGE_BOUNDS_4DIM
- self.goal = np.zeros(3)
- else:
- raise ValueError("either 2 or 4 dimensional Contexts available")
-
- # has no effect as it is overwritten in init of super
- # action_space_low = np.array([-2.6, -2.0, -2.8, -0.9, -4.8, -1.6, -2.2])
- # action_space_high = np.array([2.6, 2.0, 2.8, 3.1, 1.3, 1.6, 2.2])
- # self.action_space = spaces.Box(low=action_space_low, high=action_space_high, dtype='float64')
-
- self.time_steps = 0
- self.init_qpos_tt = np.array([0, 0, 0, 1.5, 0, 0, 1.5, 0, 0, 0])
- self.init_qvel_tt = np.zeros(10)
-
- self.reward_func = TT_Reward(self.ctxt_dim)
- self.ball_landing_pos = None
- self.hit_ball = False
- self.ball_contact_after_hit = False
- self._ids_set = False
- super(TTEnvGym, self).__init__(model_path=model_path, frame_skip=1)
- self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
- self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
- self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
- self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
- self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
- self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
- self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
-
-
- def _set_ids(self):
- self.ball_id = self.sim.model._body_name2id[BALL_NAME] # find the proper -> not protected func.
- self.table_contact_id = self.sim.model._geom_name2id[TABLE_NAME]
- self.floor_contact_id = self.sim.model._geom_name2id[FLOOR_NAME]
- self.paddle_contact_id_1 = self.sim.model._geom_name2id[PADDLE_CONTACT_1_NAME] # check if we need both or only this
- self.paddle_contact_id_2 = self.sim.model._geom_name2id[PADDLE_CONTACT_2_NAME] # check if we need both or only this
- self.racket_id = self.sim.model._geom_name2id[RACKET_NAME]
- self.ball_contact_id = self.sim.model._geom_name2id[BALL_NAME_CONTACT]
- self._ids_set = True
-
- def _get_obs(self):
- ball_pos = self.sim.data.body_xpos[self.ball_id][:2].copy()
- goal_pos = self.goal[:2].copy()
- obs = np.concatenate([self.sim.data.qpos[:7].copy(), # 7 joint positions
- ball_pos,
- goal_pos])
- return obs
-
- def sample_context(self):
- return self.np_random.uniform(self.context_range_bounds[0], self.context_range_bounds[1], size=self.ctxt_dim)
-
- def reset_model(self):
- self.set_state(self.init_qpos_tt, self.init_qvel_tt) # reset to initial sim state
- self.time_steps = 0
- self.ball_landing_pos = None
- self.hit_ball = False
- self.ball_contact_after_hit = False
- if self.fixed_goal:
- self.goal = self.goal[:2]
- else:
- self.goal = self.sample_context()[:2]
- if self.ctxt_dim == 2:
- initial_ball_state = ball_init(random=False) # fixed velocity, fixed position
- elif self.ctxt_dim == 4:
- initial_ball_state = ball_init(random=False)#raise NotImplementedError
-
- self.sim.data.set_joint_qpos('tar:x', initial_ball_state[0])
- self.sim.data.set_joint_qpos('tar:y', initial_ball_state[1])
- self.sim.data.set_joint_qpos('tar:z', initial_ball_state[2])
-
- self.sim.data.set_joint_qvel('tar:x', initial_ball_state[3])
- self.sim.data.set_joint_qvel('tar:y', initial_ball_state[4])
- self.sim.data.set_joint_qvel('tar:z', initial_ball_state[5])
-
- z_extended_goal_pos = np.concatenate((self.goal[:2], [0.77]))
- self.goal = z_extended_goal_pos
- self.sim.model.body_pos[5] = self.goal[:3] # Desired Landing Position, Yellow
- self.sim.model.body_pos[3] = np.array([0, 0, 0.5]) # Outgoing Ball Landing Position, Green
- self.sim.model.body_pos[4] = np.array([0, 0, 0.5]) # Incoming Ball Landing Position, Red
- self.sim.forward()
-
- self.reward_func.reset(self.goal) # reset the reward function
- self.n_step = 0
- return self._get_obs()
-
- def _contact_checker(self, id_1, id_2):
- for coni in range(0, self.sim.data.ncon):
- con = self.sim.data.contact[coni]
- collision = con.geom1 == id_1 and con.geom2 == id_2
- collision_trans = con.geom1 == id_2 and con.geom2 == id_1
- if collision or collision_trans:
- return True
- return False
-
- def step(self, action):
- if not self._ids_set:
- self._set_ids()
- done = False
- episode_end = False if self.time_steps + 1 < MAX_EPISODE_STEPS else True
- if not self.hit_ball:
- self.hit_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_1) # check for one side
- if not self.hit_ball:
- self.hit_ball = self._contact_checker(self.ball_contact_id, self.paddle_contact_id_2) # check for other side
- if self.hit_ball:
- if not self.ball_contact_after_hit:
- if self._contact_checker(self.ball_contact_id, self.floor_contact_id): # first check contact with floor
- self.ball_contact_after_hit = True
- self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
- elif self._contact_checker(self.ball_contact_id, self.table_contact_id): # second check contact with table
- self.ball_contact_after_hit = True
- self.ball_landing_pos = self.sim.data.body_xpos[self.ball_id]
- c_ball_pos = self.sim.data.body_xpos[self.ball_id]
- racket_pos = self.sim.data.geom_xpos[self.racket_id] # TODO: use this to reach out the position of the paddle?
- if self.ball_landing_pos is not None:
- done = True
- episode_end =True
- reward = self.reward_func.get_reward(episode_end, c_ball_pos, racket_pos, self.hit_ball, self.ball_landing_pos)
- self.time_steps += 1
- # gravity compensation on joints:
- #action += self.sim.data.qfrc_bias[:7].copy()
- try:
- self.do_simulation(action, self.frame_skip)
- except mujoco_py.MujocoException as e:
- print('Simulation got unstable returning')
- done = True
- reward = -25
- ob = self._get_obs()
- info = {"hit_ball": self.hit_ball,
- "q_pos": np.copy(self.sim.data.qpos[:7]),
- "ball_pos": np.copy(self.sim.data.qpos[7:])}
- self.n_step += 1
- return ob, reward, done, info # might add some information here ....
-
- def set_context(self, context):
- old_state = self.sim.get_state()
- qpos = old_state.qpos.copy()
- qvel = old_state.qvel.copy()
- self.set_state(qpos, qvel)
- self.goal = context
- z_extended_goal_pos = np.concatenate((self.goal[:self.ctxt_dim], [0.77]))
- if self.ctxt_dim == 4:
- z_extended_goal_pos = np.concatenate((z_extended_goal_pos, [0.77]))
- self.goal = z_extended_goal_pos
- self.sim.model.body_pos[5] = self.goal[:3] # TODO: Missing: Setting the desired incomoing landing position
- self.sim.forward()
- return self._get_obs()
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_reward.py b/alr_envs/alr/mujoco/table_tennis/tt_reward.py
deleted file mode 100644
index 0e1bebf..0000000
--- a/alr_envs/alr/mujoco/table_tennis/tt_reward.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import numpy as np
-
-
-class TT_Reward:
-
- def __init__(self, ctxt_dim):
- self.ctxt_dim = ctxt_dim
- self.c_goal = None # current desired landing point
- self.c_ball_traj = []
- self.c_racket_traj = []
- self.constant = 8
-
- def get_reward(self, episode_end, ball_position, racket_pos, hited_ball, ball_landing_pos):
- self.c_ball_traj.append(ball_position.copy())
- self.c_racket_traj.append(racket_pos.copy())
- if not episode_end:
- return 0
- else:
- # # seems to work for episodic case
- min_r_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
- if not hited_ball:
- return 0.2 * (1 - np.tanh(min_r_b_dist**2))
- else:
- if ball_landing_pos is None:
- min_b_des_b_dist = np.min(np.linalg.norm(np.array(self.c_ball_traj)[:,:2] - self.c_goal[:2], axis=1))
- return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + (1 - np.tanh(min_b_des_b_dist**2))
- else:
- min_b_des_b_land_dist = np.linalg.norm(self.c_goal[:2] - ball_landing_pos[:2])
- over_net_bonus = int(ball_landing_pos[0] < 0)
- return 2 * (1 - np.tanh(min_r_b_dist ** 2)) + 4 * (1 - np.tanh(min_b_des_b_land_dist ** 2)) + over_net_bonus
-
-
- # if not hited_ball:
- # min_r_b_dist = 1 + np.min(np.linalg.norm(np.array(self.c_ball_traj) - np.array(self.c_racket_traj), axis=1))
- # return -min_r_b_dist
- # else:
- # if ball_landing_pos is None:
- # dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_position), 0.75)/self.constant
- # else:
- # dist_to_des_pos = 1-np.power(np.linalg.norm(self.c_goal - ball_landing_pos), 0.75)/self.constant
- # if dist_to_des_pos < -0.2:
- # dist_to_des_pos = -0.2
- # return -dist_to_des_pos
-
- def reset(self, goal):
- self.c_goal = goal.copy()
- self.c_ball_traj = []
- self.c_racket_traj = []
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/tt_utils.py b/alr_envs/alr/mujoco/table_tennis/tt_utils.py
deleted file mode 100644
index 04a1b97..0000000
--- a/alr_envs/alr/mujoco/table_tennis/tt_utils.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import numpy as np
-
-
-def ball_init(random=False, context_range=None):
- if random:
- dx = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
- dy = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
- dz = np.random.uniform(-0.1, 0.1) # TODO: clarify these numbers?
-
- v_x = np.random.uniform(1.7, 1.8)
- v_y = np.random.uniform(0.7, 0.8)
- v_z = np.random.uniform(0.1, 0.2)
- else:
- dx = 1
- dy = 0
- dz = 0.05
-
- v_x = 2.5
- v_y = 2
- v_z = 0.5
-
- initial_x = 0 + dx - 1.2
- initial_y = -0.2 + dy - 0.6
- initial_z = 0.3 + dz + 1.5
- initial_ball_state = np.array([initial_x, initial_y, initial_z, v_x, v_y, v_z])
- return initial_ball_state
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
deleted file mode 100644
index ec94d96..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_7_motor_actuator.xml
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
deleted file mode 100644
index 7e04c28..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_barrett_wam_7dof_right.xml
+++ /dev/null
@@ -1,103 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
deleted file mode 100644
index c313489..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_table.xml
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml b/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
deleted file mode 100644
index 19543e2..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/include_target_ball.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml b/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
deleted file mode 100644
index 25d0366..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/right_arm_actuator.xml
+++ /dev/null
@@ -1,47 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml b/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
deleted file mode 100644
index e349992..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/shared.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml b/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
deleted file mode 100644
index 9609a6e..0000000
--- a/alr_envs/alr/mujoco/table_tennis/xml/table_tennis_env.xml
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py b/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
deleted file mode 100644
index 63432a7..0000000
--- a/alr_envs/alr/mujoco/walker_2d_jump/mp_wrapper.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from typing import Tuple, Union
-
-import numpy as np
-
-from alr_envs.black_box.raw_interface_wrapper import RawInterfaceWrapper
-
-
-class MPWrapper(RawInterfaceWrapper):
-
- @property
- def context_mask(self):
- return np.hstack([
- [False] * 17,
- [True] # goal pos
- ])
-
- @property
- def current_pos(self) -> Union[float, int, np.ndarray]:
- return self.env.data.qpos[3:9].copy()
-
- @property
- def current_vel(self) -> Union[float, int, np.ndarray, Tuple]:
- return self.env.data.qvel[3:9].copy()