refractoring of projection layer, improved modularization of code

This commit is contained in:
ottofabian 2020-12-11 09:46:35 +01:00
parent 768ae14655
commit b8f0c91a90
3 changed files with 7 additions and 4 deletions

View File

@ -31,7 +31,7 @@ Currently we have the following environements:
|`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. | |`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. |
## INSTALL ## Install
1. Clone the repository 1. Clone the repository
```bash ```bash
git clone git@github.com:ALRhub/alr_envs.git git clone git@github.com:ALRhub/alr_envs.git

View File

@ -64,7 +64,7 @@ register(
) )
register( register(
id='ALRReacherSparse-v0', id='ALRReacher7Sparse-v0',
entry_point='alr_envs.mujoco:ALRReacherEnv', entry_point='alr_envs.mujoco:ALRReacherEnv',
max_episode_steps=200, max_episode_steps=200,
kwargs={ kwargs={

View File

@ -37,11 +37,14 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
reward_dist = 0.0 reward_dist = 0.0
angular_vel = 0.0 angular_vel = 0.0
reward_balance = 0.0
if self._steps >= self.steps_before_reward: if self._steps >= self.steps_before_reward:
vec = self.get_body_com("fingertip") - self.get_body_com("target") vec = self.get_body_com("fingertip") - self.get_body_com("target")
reward_dist -= self.reward_weight * np.linalg.norm(vec) reward_dist -= self.reward_weight * np.linalg.norm(vec)
angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links]) angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
reward_ctrl = - np.square(a).sum() reward_ctrl = - np.square(a).sum()
if self.balance:
reward_balance = - self.balance_weight * np.abs( reward_balance = - self.balance_weight * np.abs(
angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad")) angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))