updates in biac

2021-02-11 16:19:57 +01:00 · 2021-02-11 16:19:57 +01:00 · 708478c626
commit 708478c626
parent 13a292f0e0
4 changed files with 162 additions and 10 deletions
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
@ -2,27 +2,44 @@ from gym import utils
 import os
 import numpy as np
 from alr_envs.mujoco import alr_mujoco_env
-from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
+from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
 import mujoco_py


 class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
-    def __init__(self, ):
+    def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
        self._steps = 0

        self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
-                                     "ball-in-a-cup_base" + ".xml")
-
-        self.sim_time = 8  # seconds
-        self.sim_steps = int(self.sim_time / (0.0005 * 4))  # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct
-        self.reward_function = BallInACupReward(self.sim_steps)
+                                     "biac_base" + ".xml")

        self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
+        self.start_vel = np.zeros(7)
+
        self._q_pos = []
+        self._q_vel = []
+        # self.weight_matrix_scale = 50
+        self.max_ctrl = np.array([150., 125., 40., 60., 5., 5., 2.])
+        self.p_gains = 1 / self.max_ctrl * np.array([200, 300, 100, 100, 10, 10, 2.5])
+        self.d_gains = 1 / self.max_ctrl * np.array([7, 15, 5, 2.5, 0.3, 0.3, 0.05])
+
+        self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
+        self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
+
+        self.context = None

        utils.EzPickle.__init__(self)
-        alr_mujoco_env.AlrMujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
-                                      n_substeps=4)
+        alr_mujoco_env.AlrMujocoEnv.__init__(self,
+                                             self.xml_path,
+                                             apply_gravity_comp=apply_gravity_comp,
+                                             n_substeps=n_substeps)
+
+        self.sim_time = 8  # seconds
+        self.sim_steps = int(self.sim_time / self.dt)
+        if reward_function is None:
+            from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
+            reward_function = BallInACupReward
+        self.reward_function = reward_function(self.sim_steps)

    def configure(self, context):
        self.context = context
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
@ -0,0 +1,95 @@
+import numpy as np
+from alr_envs.mujoco import alr_reward_fct
+
+
+class BallInACupReward(alr_reward_fct.AlrReward):
+    def __init__(self, sim_time):
+        self.sim_time = sim_time
+
+        self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
+                                  "wrist_pitch_link_convex_decomposition_p1_geom",
+                                  "wrist_pitch_link_convex_decomposition_p2_geom",
+                                  "wrist_pitch_link_convex_decomposition_p3_geom",
+                                  "wrist_yaw_link_convex_decomposition_p1_geom",
+                                  "wrist_yaw_link_convex_decomposition_p2_geom",
+                                  "forearm_link_convex_decomposition_p1_geom",
+                                  "forearm_link_convex_decomposition_p2_geom"]
+
+        self.ball_id = None
+        self.ball_collision_id = None
+        self.goal_id = None
+        self.goal_final_id = None
+        self.collision_ids = None
+
+        self.ball_traj = None
+        self.dists = None
+        self.dists_final = None
+        self.costs = None
+
+        self.reset(None)
+
+    def reset(self, context):
+        self.ball_traj = np.zeros(shape=(self.sim_time, 3))
+        self.dists = []
+        self.dists_final = []
+        self.costs = []
+        self.context = context
+
+    def compute_reward(self, action, sim, step, context=None):
+        self.ball_id = sim.model._body_name2id["ball"]
+        self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
+        self.goal_id = sim.model._site_name2id["cup_goal"]
+        self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
+        self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
+
+        ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
+
+        # Compute the current distance from the ball to the inner part of the cup
+        goal_pos = sim.data.site_xpos[self.goal_id]
+        ball_pos = sim.data.body_xpos[self.ball_id]
+        goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+        self.dists.append(np.linalg.norm(goal_pos - ball_pos))
+        self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
+        self.ball_traj[step, :] = ball_pos
+
+        action_cost = np.sum(np.square(action))
+
+        if self.check_collision(sim):
+            reward = - 1e-5 * action_cost - 1000
+            return reward, False, True
+
+        if step == self.sim_time - 1:
+            min_dist = np.min(self.dists)
+            dist_final = self.dists_final[-1]
+
+            cost = 0.5 * min_dist + 0.5 * dist_final
+            reward = np.exp(-2 * cost) - 1e-5 * action_cost
+            success = dist_final < 0.05 and ball_in_cup
+        else:
+            reward = - 1e-5 * action_cost
+            success = False
+
+        return reward, success, False
+
+    def check_ball_in_cup(self, sim, ball_collision_id):
+        cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 == cup_base_collision_id and con.geom2 == ball_collision_id
+            collision_trans = con.geom1 == ball_collision_id and con.geom2 == cup_base_collision_id
+
+            if collision or collision_trans:
+                return True
+        return False
+
+    def check_collision(self, sim):
+        for coni in range(0, sim.data.ncon):
+            con = sim.data.contact[coni]
+
+            collision = con.geom1 in self.collision_ids and con.geom2 == self.ball_collision_id
+            collision_trans = con.geom1 == self.ball_collision_id and con.geom2 in self.collision_ids
+
+            if collision or collision_trans:
+                return True
+        return False
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_simple.py
@ -93,7 +93,8 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
                                      velocity=angular_vel, # reward_balance=reward_balance,
                                      # end_effector=self.get_body_com("fingertip").copy(),
                                      goal=self.goal if hasattr(self, "goal") else None,
-                                      traj=self._q_pos)
+                                      traj=self._q_pos,
+                                      is_collided=crash or joint_cons_viol)

    def check_traj_in_joint_limits(self):
        return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
--- a/alr_envs/mujoco/ball_in_a_cup/utils.py
+++ b/alr_envs/mujoco/ball_in_a_cup/utils.py
@ -0,0 +1,39 @@
+from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
+from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
+from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv as ALRBallInACupEnvSimple
+
+
+# TODO: add make_env for standard biac
+
+
+def make_simple_env(rank, seed=0):
+    """
+    Utility function for multiprocessed env.
+
+    :param env_id: (str) the environment ID
+    :param num_env: (int) the number of environments you wish to have in subprocesses
+    :param seed: (int) the initial seed for RNG
+    :param rank: (int) index of the subprocess
+    :returns a function that generates an environment
+    """
+
+    def _init():
+        env = ALRBallInACupEnvSimple()
+
+        env = DmpEnvWrapper(env,
+                            policy_type="motor",
+                            start_pos=env.start_pos[1::2],
+                            final_pos=env.start_pos[1::2],
+                            num_dof=3,
+                            num_basis=8,
+                            duration=3.5,
+                            alpha_phase=3,
+                            post_traj_time=4.5,
+                            dt=env.dt,
+                            learn_goal=False,
+                            weights_scale=50)
+
+        env.seed(seed + rank)
+        return env
+
+    return _init