diff --git a/README.md b/README.md index 208edbe..e9f88a1 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Currently we have the following environements: |`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. | -## INSTALL +## Install 1. Clone the repository ```bash git clone git@github.com:ALRhub/alr_envs.git diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py index 2193db9..62e5db0 100644 --- a/alr_envs/__init__.py +++ b/alr_envs/__init__.py @@ -64,7 +64,7 @@ register( ) register( - id='ALRReacherSparse-v0', + id='ALRReacher7Sparse-v0', entry_point='alr_envs.mujoco:ALRReacherEnv', max_episode_steps=200, kwargs={ diff --git a/alr_envs/mujoco/alr_reacher.py b/alr_envs/mujoco/alr_reacher.py index 7ae28da..0c65e7f 100644 --- a/alr_envs/mujoco/alr_reacher.py +++ b/alr_envs/mujoco/alr_reacher.py @@ -37,13 +37,16 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): reward_dist = 0.0 angular_vel = 0.0 + reward_balance = 0.0 if self._steps >= self.steps_before_reward: vec = self.get_body_com("fingertip") - self.get_body_com("target") reward_dist -= self.reward_weight * np.linalg.norm(vec) angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) reward_ctrl = - np.square(a).sum() - reward_balance = - self.balance_weight * np.abs( - angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) + + if self.balance: + reward_balance = - self.balance_weight * np.abs( + angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) reward = reward_dist + reward_ctrl + angular_vel + reward_balance self.do_simulation(a, self.frame_skip)