From cbdd5d1854d699175a0a66684702b4252102f2a7 Mon Sep 17 00:00:00 2001
From: ottofabian <fabi.otto@t-online.de>
Date: Tue, 8 Sep 2020 12:43:14 +0200
Subject: [PATCH] removed action scaling

---
 alr_envs/classic_control/simple_reacher.py | 26 +++++++++-------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py
index 9c9aeea..4d9d930 100644
--- a/alr_envs/classic_control/simple_reacher.py
+++ b/alr_envs/classic_control/simple_reacher.py
@@ -31,7 +31,7 @@ class SimpleReacherEnv(gym.Env):
         self._angle_velocity = None
 
         self.max_torque = 1  # 10
-        self.steps_before_reward = 50
+        self.steps_before_reward = 100
 
         action_bound = np.ones((self.n_links,))
         state_bound = np.hstack([
@@ -50,9 +50,10 @@ class SimpleReacherEnv(gym.Env):
         self._steps = 0
         self.seed()
 
-    def step(self, action):
+    def step(self, action: np.ndarray):
 
-        action = self._scale_action(action)
+        # action = self._add_action_noise(action)
+        action = np.clip(action, -self.max_torque, self.max_torque)
 
         self._angle_velocity = self._angle_velocity + self.dt * action
         self._joint_angle = angle_normalize(self._joint_angle + self.dt * self._angle_velocity)
@@ -66,22 +67,16 @@ class SimpleReacherEnv(gym.Env):
 
         return self._get_obs().copy(), reward, done, info
 
-    def _scale_action(self, action):
+    def _add_action_noise(self, action: np.ndarray):
         """
-        scale actions back in order to provide normalized actions \in [0,1]
-
+        add unobserved Gaussian Noise N(0,0.5) to the actions
         Args:
-            action: action to scale
+            action:
 
-        Returns: action according to self.max_torque
+        Returns: actions with noise
 
         """
-
-        ub = self.max_torque
-        lb = -self.max_torque
-
-        action = lb + (action + 1.) * 0.5 * (ub - lb)
-        return np.clip(action, lb, ub)
+        return self.np_random.normal(0, 0.1, *action.shape) + action
 
     def _get_obs(self):
         theta = self._joint_angle
@@ -103,7 +98,7 @@ class SimpleReacherEnv(gym.Env):
         x = self.link_lengths * np.vstack([np.cos(angles), np.sin(angles)])
         self._joints[1:] = self._joints[0] + np.cumsum(x.T, axis=0)
 
-    def _get_reward(self, action):
+    def _get_reward(self, action: np.ndarray):
         diff = self.end_effector - self._goal_pos
         reward_dist = 0
 
@@ -152,6 +147,7 @@ class SimpleReacherEnv(gym.Env):
             plt.figure(self.fig.number)
 
         plt.cla()
+        plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self._goal_pos}")
 
         # Arm
         plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')