corrected reward for hopperjumprndminit + ALRReacher for iLQR

2022-04-21 08:44:13 +02:00 · 2022-04-21 08:44:13 +02:00 · c0502cf1d4
commit c0502cf1d4
parent 77927e9157
4 changed files with 58 additions and 5 deletions
--- a/alr_envs/alr/init.py
+++ b/alr_envs/alr/init.py
@ -148,6 +148,17 @@ register(
    }
 )

+register(
+    id='ALRReacherSparseOptCtrl-v0',
+    entry_point='alr_envs.alr.mujoco:ALRReacherOptCtrlEnv',
+    max_episode_steps=200,
+    kwargs={
+        "steps_before_reward": 200,
+        "n_links": 5,
+        "balance": False,
+    }
+)
+
 register(
    id='ALRReacherSparseBalanced-v0',
    entry_point='alr_envs.alr.mujoco:ALRReacherEnv',
--- a/alr_envs/alr/mujoco/init.py
+++ b/alr_envs/alr/mujoco/init.py
@ -1,4 +1,4 @@
-from .reacher.alr_reacher import ALRReacherEnv
+from .reacher.alr_reacher import ALRReacherEnv, ALRReacherOptCtrlEnv
 from .reacher.balancing import BalancingEnv
 from .ball_in_a_cup.ball_in_a_cup import ALRBallInACupEnv
 from .ball_in_a_cup.biac_pd import ALRBallInACupPDEnv
--- a/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
+++ b/alr_envs/alr/mujoco/hopper_jump/hopper_jump.py
@ -94,10 +94,16 @@ class ALRHopperJumpEnv(HopperEnv):

 class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):
    def __init__(self, max_episode_steps=250):
+        self.contact_with_floor = False
+        self._floor_geom_id = None
+        self._foot_geom_id = None
        super(ALRHopperJumpRndmPosEnv, self).__init__(exclude_current_positions_from_observation=False,
                                                      reset_noise_scale=5e-1,
                                                      max_episode_steps=max_episode_steps)
+
    def reset_model(self):
+        self._floor_geom_id = self.model.geom_name2id('floor')
+        self._foot_geom_id = self.model.geom_name2id('foot_geom')
        noise_low = -self._reset_noise_scale
        noise_high = self._reset_noise_scale
        rnd_vec = self.np_random.uniform(low=noise_low, high=noise_high, size=self.model.nq)
@ -116,8 +122,12 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):

        self.current_step += 1
        self.do_simulation(action, self.frame_skip)
+
+        self.contact_with_floor = self._contact_checker(self._floor_geom_id, self._foot_geom_id) if not \
+                                                                      self.contact_with_floor else True
+
        height_after = self.get_body_com("torso")[2]
-        self.max_height = max(height_after, self.max_height)
+        self.max_height = max(height_after, self.max_height) if self.contact_with_floor else 0

        ctrl_cost = self.control_cost(action)
        costs = ctrl_cost
@ -142,9 +152,19 @@ class ALRHopperJumpRndmPosEnv(ALRHopperJumpEnv):

        return observation, reward, done, info

+    def _contact_checker(self, id_1, id_2):
+        for coni in range(0, self.sim.data.ncon):
+            con = self.sim.data.contact[coni]
+            collision = con.geom1 == id_1 and con.geom2 == id_2
+            collision_trans = con.geom1 == id_2 and con.geom2 == id_1
+            if collision or collision_trans:
+                return True
+        return False
+
 if __name__ == '__main__':
    render_mode = "human"  # "human" or "partial" or "final"
-    env = ALRHopperJumpEnv()
+    # env = ALRHopperJumpEnv()
+    env = ALRHopperJumpRndmPosEnv()
    obs = env.reset()

    for i in range(2000):
@ -152,8 +172,9 @@ if __name__ == '__main__':
        # test with random actions
        ac = env.action_space.sample()
        obs, rew, d, info = env.step(ac)
-        if i % 10 == 0:
-            env.render(mode=render_mode)
+        # if i % 10 == 0:
+        #     env.render(mode=render_mode)
+        env.render(mode=render_mode)
        if d:
            print('After ', i, ' steps, done: ', d)
            env.reset()
--- a/alr_envs/alr/mujoco/reacher/alr_reacher.py
+++ b/alr_envs/alr/mujoco/reacher/alr_reacher.py
@ -87,6 +87,27 @@ class ALRReacherEnv(MujocoEnv, utils.EzPickle):
            [self._steps],
        ])

+class ALRReacherOptCtrlEnv(ALRReacherEnv):
+    def __init__(self, steps_before_reward=200, n_links=5, balance=False):
+        super(ALRReacherOptCtrlEnv, self).__init__(steps_before_reward, n_links, balance)
+        self.goal = np.array([0.1, 0.1])
+
+    def _get_obs(self):
+        theta = self.sim.data.qpos.flat[:self.n_links]
+        return np.concatenate([
+            theta,
+            self.sim.data.qvel.flat[:self.n_links],  # this is angular velocity
+        ])
+
+    def reset_model(self):
+        qpos = self.init_qpos
+        qpos[-2:] = self.goal
+        qvel = self.init_qvel
+        qvel[-2:] = 0
+        self.set_state(qpos, qvel)
+        self._steps = 0
+
+        return self._get_obs()

 if __name__ == '__main__':
    nl = 5