From 708478c62676c64baa8edc78fe7570b3afbce6d6 Mon Sep 17 00:00:00 2001 From: Maximilian Huettenrauch Date: Thu, 11 Feb 2021 16:19:57 +0100 Subject: [PATCH] updates in biac --- .../mujoco/ball_in_a_cup/ball_in_a_cup.py | 35 +++++-- .../ball_in_a_cup/ball_in_a_cup_reward.py | 95 +++++++++++++++++++ .../ball_in_a_cup/ball_in_a_cup_simple.py | 3 +- alr_envs/mujoco/ball_in_a_cup/utils.py | 39 ++++++++ 4 files changed, 162 insertions(+), 10 deletions(-) create mode 100644 alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py create mode 100644 alr_envs/mujoco/ball_in_a_cup/utils.py diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py index ad2e52a..646c433 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py @@ -2,27 +2,44 @@ from gym import utils import os import numpy as np from alr_envs.mujoco import alr_mujoco_env -from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward +from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward import mujoco_py class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): - def __init__(self, ): + def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None): self._steps = 0 self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", - "ball-in-a-cup_base" + ".xml") - - self.sim_time = 8 # seconds - self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct - self.reward_function = BallInACupReward(self.sim_steps) + "biac_base" + ".xml") self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57]) + self.start_vel = np.zeros(7) + self._q_pos = [] + self._q_vel = [] + # self.weight_matrix_scale = 50 + self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.]) + self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5]) + self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05]) + + self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7]) + self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7]) + + self.context = None utils.EzPickle.__init__(self) - alr_mujoco_env.AlrMujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"), - n_substeps=4) + alr_mujoco_env.AlrMujocoEnv.__init__(self, + self.xml_path, + apply_gravity_comp=apply_gravity_comp, + n_substeps=n_substeps) + + self.sim_time = 8 # seconds + self.sim_steps = int(self.sim_time / self.dt) + if reward_function is None: + from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward + reward_function = BallInACupReward + self.reward_function = reward_function(self.sim_steps) def configure(self, context): self.context = context diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py new file mode 100644 index 0000000..76a4ec6 --- /dev/null +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py @@ -0,0 +1,95 @@ +import numpy as np +from alr_envs.mujoco import alr_reward_fct + + +class BallInACupReward(alr_reward_fct.AlrReward): + def __init__(self, sim_time): + self.sim_time = sim_time + + self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom", + "wrist_pitch_link_convex_decomposition_p1_geom", + "wrist_pitch_link_convex_decomposition_p2_geom", + "wrist_pitch_link_convex_decomposition_p3_geom", + "wrist_yaw_link_convex_decomposition_p1_geom", + "wrist_yaw_link_convex_decomposition_p2_geom", + "forearm_link_convex_decomposition_p1_geom", + "forearm_link_convex_decomposition_p2_geom"] + + self.ball_id = None + self.ball_collision_id = None + self.goal_id = None + self.goal_final_id = None + self.collision_ids = None + + self.ball_traj = None + self.dists = None + self.dists_final = None + self.costs = None + + self.reset(None) + + def reset(self, context): + self.ball_traj = np.zeros(shape=(self.sim_time, 3)) + self.dists = [] + self.dists_final = [] + self.costs = [] + self.context = context + + def compute_reward(self, action, sim, step, context=None): + self.ball_id = sim.model._body_name2id["ball"] + self.ball_collision_id = sim.model._geom_name2id["ball_geom"] + self.goal_id = sim.model._site_name2id["cup_goal"] + self.goal_final_id = sim.model._site_name2id["cup_goal_final"] + self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects] + + ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id) + + # Compute the current distance from the ball to the inner part of the cup + goal_pos = sim.data.site_xpos[self.goal_id] + ball_pos = sim.data.body_xpos[self.ball_id] + goal_final_pos = sim.data.site_xpos[self.goal_final_id] + self.dists.append(np.linalg.norm(goal_pos - ball_pos)) + self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos)) + self.ball_traj[step, :] = ball_pos + + action_cost = np.sum(np.square(action)) + + if self.check_collision(sim): + reward = - 1e-5 * action_cost - 1000 + return reward, False, True + + if step == self.sim_time - 1: + min_dist = np.min(self.dists) + dist_final = self.dists_final[-1] + + cost = 0.5 * min_dist + 0.5 * dist_final + reward = np.exp(-2 * cost) - 1e-5 * action_cost + success = dist_final < 0.05 and ball_in_cup + else: + reward = - 1e-5 * action_cost + success = False + + return reward, success, False + + def check_ball_in_cup(self, sim, ball_collision_id): + cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"] + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id + collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id + + if collision or collision_trans: + return True + return False + + def check_collision(self, sim): + for coni in range(0, sim.data.ncon): + con = sim.data.contact[coni] + + collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id + collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids + + if collision or collision_trans: + return True + return False diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py index efc702c..ab7e332 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py @@ -93,7 +93,8 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle): velocity=angular_vel, # reward_balance=reward_balance, # end_effector=self.get_body_com("fingertip").copy(), goal=self.goal if hasattr(self, "goal") else None, - traj=self._q_pos) + traj=self._q_pos, + is_collided=crash or joint_cons_viol) def check_traj_in_joint_limits(self): return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min) diff --git a/alr_envs/mujoco/ball_in_a_cup/utils.py b/alr_envs/mujoco/ball_in_a_cup/utils.py new file mode 100644 index 0000000..a4295da --- /dev/null +++ b/alr_envs/mujoco/ball_in_a_cup/utils.py @@ -0,0 +1,39 @@ +from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper +from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv +from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv as ALRBallInACupEnvSimple + + +# TODO: add make_env for standard biac + + +def make_simple_env(rank, seed=0): + """ + Utility function for multiprocessed env. + + :param env_id: (str) the environment ID + :param num_env: (int) the number of environments you wish to have in subprocesses + :param seed: (int) the initial seed for RNG + :param rank: (int) index of the subprocess + :returns a function that generates an environment + """ + + def _init(): + env = ALRBallInACupEnvSimple() + + env = DmpEnvWrapper(env, + policy_type="motor", + start_pos=env.start_pos[1::2], + final_pos=env.start_pos[1::2], + num_dof=3, + num_basis=8, + duration=3.5, + alpha_phase=3, + post_traj_time=4.5, + dt=env.dt, + learn_goal=False, + weights_scale=50) + + env.seed(seed + rank) + return env + + return _init