refractoring of projection layer, improved modularization of code
This commit is contained in:
parent
768ae14655
commit
b8f0c91a90
@ -31,7 +31,7 @@ Currently we have the following environements:
|
|||||||
|`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. |
|
|`Rosenbrock{dim}-v0`| Gym interface for Rosenbrock function. `{dim}` is one of 5, 10, 25, 50 or 100. |
|
||||||
|
|
||||||
|
|
||||||
## INSTALL
|
## Install
|
||||||
1. Clone the repository
|
1. Clone the repository
|
||||||
```bash
|
```bash
|
||||||
git clone git@github.com:ALRhub/alr_envs.git
|
git clone git@github.com:ALRhub/alr_envs.git
|
||||||
|
@ -64,7 +64,7 @@ register(
|
|||||||
)
|
)
|
||||||
|
|
||||||
register(
|
register(
|
||||||
id='ALRReacherSparse-v0',
|
id='ALRReacher7Sparse-v0',
|
||||||
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
entry_point='alr_envs.mujoco:ALRReacherEnv',
|
||||||
max_episode_steps=200,
|
max_episode_steps=200,
|
||||||
kwargs={
|
kwargs={
|
||||||
|
@ -37,11 +37,14 @@ class ALRReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
reward_dist = 0.0
|
reward_dist = 0.0
|
||||||
angular_vel = 0.0
|
angular_vel = 0.0
|
||||||
|
reward_balance = 0.0
|
||||||
if self._steps >= self.steps_before_reward:
|
if self._steps >= self.steps_before_reward:
|
||||||
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
vec = self.get_body_com("fingertip") - self.get_body_com("target")
|
||||||
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
reward_dist -= self.reward_weight * np.linalg.norm(vec)
|
||||||
angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
angular_vel -= np.linalg.norm(self.sim.data.qvel.flat[:self.n_links])
|
||||||
reward_ctrl = - np.square(a).sum()
|
reward_ctrl = - np.square(a).sum()
|
||||||
|
|
||||||
|
if self.balance:
|
||||||
reward_balance = - self.balance_weight * np.abs(
|
reward_balance = - self.balance_weight * np.abs(
|
||||||
angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
|
angle_normalize(np.sum(self.sim.data.qpos.flat[:self.n_links]), type="rad"))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user