From d9f52194f759cc61d1a2cafe5dd1adb49d7d1432 Mon Sep 17 00:00:00 2001 From: ottofabian Date: Tue, 22 Sep 2020 17:41:25 +0200 Subject: [PATCH] reacher updates --- alr_envs/__init__.py | 23 +++++++ alr_envs/mujoco/alr_reacher.py | 50 +++++++++++---- alr_envs/mujoco/assets/reacher_7links.xml | 75 +++++++++++++++++++++++ 3 files changed, 136 insertions(+), 12 deletions(-) create mode 100644 alr_envs/mujoco/assets/reacher_7links.xml diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 12dfa27..14359b6 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -6,6 +6,27 @@ register( max_episode_steps=200, kwargs={ "steps_before_reward": 0, + "n_links": 5, + } +) + +register( + id='ALRReacherShort-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=50, + kwargs={ + "steps_before_reward": 50, + "n_links": 5, + } +) + +register( + id='ALRReacherSparse-v0', + entry_point='alr_envs.mujoco:ALRReacherEnv', + max_episode_steps=200, + kwargs={ + "steps_before_reward": 200, + "n_links": 5, } ) @@ -15,6 +36,7 @@ register( max_episode_steps=200, kwargs={ "steps_before_reward": 100, + "n_links": 5, } ) @@ -24,6 +46,7 @@ register( max_episode_steps=200, kwargs={ "steps_before_reward": 180, + "n_links": 5, } ) diff --git a/alr_envs/mujoco/alr_reacher.py b/alr_envs/mujoco/alr_reacher.py index 95e667e..4fd5c66 100644 --- a/alr_envs/mujoco/alr_reacher.py +++ b/alr_envs/mujoco/alr_reacher.py @@ -5,19 +5,42 @@ from gym.envs.mujoco import mujoco_env class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): - def __init__(self): + def __init__(self, steps_before_reward=200, n_links=5): + self._steps = 0 + self.steps_before_reward = steps_before_reward + self.n_links = n_links + + self.reward_weight = 1 if self.steps_before_reward != 200 and self.steps_before_reward != 50 else 200 + + if n_links == 5: + file_name = 'reacher_5links.xml' + elif n_links == 7: + file_name = 'reacher_7links.xml' + else: + raise ValueError(f"Invalid number of links {n_links}, only 5 or 7 allowed.") + utils.EzPickle.__init__(self) - mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", 'reacher_5links.xml'), 2) + mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", file_name), 2) def step(self, a): - vec = self.get_body_com("fingertip") - self.get_body_com("target") - reward_dist = - np.linalg.norm(vec) + self._steps += 1 + + reward_dist = 0 + angular_vel = 0 + if self._steps >= self.steps_before_reward: + vec = self.get_body_com("fingertip") - self.get_body_com("target") + reward_dist -= self.reward_weight * np.linalg.norm(vec) + angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) reward_ctrl = - np.square(a).sum() - reward = reward_dist + reward_ctrl + + reward = reward_dist + reward_ctrl + angular_vel self.do_simulation(a, self.frame_skip) ob = self._get_obs() done = False - return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) + return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, + velocity=angular_vel, + end_effector=self.get_body_com("fingertip").copy(), + goal=self.goal if hasattr(self, "goal") else None) def viewer_setup(self): self.viewer.cam.trackbodyid = 0 @@ -25,22 +48,25 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def reset_model(self): qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos while True: - self.goal = self.np_random.uniform(low=-.2, high=.2, size=2) - if np.linalg.norm(self.goal) < 0.2: + self.goal = self.np_random.uniform(low=-self.n_links / 10, high=self.n_links / 10, size=2) + if np.linalg.norm(self.goal) < self.n_links / 10: break qpos[-2:] = self.goal qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) qvel[-2:] = 0 self.set_state(qpos, qvel) + self._steps = 0 return self._get_obs() def _get_obs(self): - theta = self.sim.data.qpos.flat[:5] + theta = self.sim.data.qpos.flat[:self.n_links] return np.concatenate([ np.cos(theta), np.sin(theta), - self.sim.data.qpos.flat[5:], # this is goal position - self.sim.data.qvel.flat[:5], # this is angular velocity - self.get_body_com("fingertip") - self.get_body_com("target") + self.sim.data.qpos.flat[self.n_links:], # this is goal position + self.sim.data.qvel.flat[:self.n_links], # this is angular velocity + self.get_body_com("fingertip") - self.get_body_com("target"), + # self.get_body_com("target"), # only return target to make problem harder + [self._steps], ]) diff --git a/alr_envs/mujoco/assets/reacher_7links.xml b/alr_envs/mujoco/assets/reacher_7links.xml new file mode 100644 index 0000000..6da5461 --- /dev/null +++ b/alr_envs/mujoco/assets/reacher_7links.xml @@ -0,0 +1,75 @@ + + + + + + + \ No newline at end of file