updates

2021-02-17 17:48:05 +01:00 · 2021-02-17 17:48:05 +01:00 · 46fc642c36
commit 46fc642c36
parent 420fe10506
6 changed files with 23 additions and 26 deletions
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@ -35,7 +35,6 @@ class HoleReacher(gym.Env):
        self._angle_velocity = None
        self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
        self.start_vel = np.zeros(self.num_links)
        self.weight_matrix_scale = 50  # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
        self.dt = 0.01
        self.time_limit = 2
@ -68,6 +67,9 @@ class HoleReacher(gym.Env):
    def end_effector(self):
        return self._joints[self.num_links].T
    def configure(self, context):
        pass
    def reset(self):
        self._joint_angles = self.start_pos
        self._angle_velocity = self.start_vel
--- a/alr_envs/classic_control/utils.py
+++ b/alr_envs/classic_control/utils.py
@ -62,7 +62,8 @@ def make_holereacher_env(rank, seed=0):
                             learn_goal=True,
                             alpha_phase=2,
                             start_pos=_env.start_pos,
-                             policy_type="velocity"
+                             policy_type="velocity",
                             weights_scale=100,
                             )
        _env.seed(seed + rank)
        return _env
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py
@ -71,10 +71,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
            dist_ctxt = self.dists_ctxt[-1]
            # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
-            cost = 2 * (0.33 * min_dist + 0.33 * dist_final + 0.33 * dist_ctxt)
+            cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt)
            reward = np.exp(-1 * cost) - 1e-4 * action_cost
            stop_sim = True
-            success = dist_final < 0.05 and ball_in_cup
+            success = dist_final < 0.05 and dist_ctxt < 0.05
        else:
            reward = - 1e-4 * action_cost
            success = False
--- a/alr_envs/utils/dmp_env_wrapper.py
+++ b/alr_envs/utils/dmp_env_wrapper.py
@ -86,8 +86,6 @@ class DmpEnvWrapper(gym.Wrapper):
    def rollout(self, params, context=None, render=False):
        """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
        goal_pos, weight_matrix = self.goal_and_weights(params)
        if hasattr(self.env, "weight_matrix_scale"):
            weight_matrix = weight_matrix * self.env.weight_matrix_scale
        self.dmp.set_weights(weight_matrix, goal_pos)
        trajectory, velocity = self.dmp.reference_trajectory(self.t)
--- a/dmp_env_wrapper_example.py
+++ b/dmp_env_wrapper_example.py
@ -5,17 +5,19 @@ import numpy as np
 if __name__ == "__main__":
-    n_samples = 10
+    n_samples = 1
    n_cpus = 4
    dim = 25
-    env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
+    # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
-                            n_samples=n_samples)
+    #                         n_samples=n_samples)
    test_env = make_viapointreacher_env(0)()
    params = np.random.randn(n_samples, dim)
    # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
-    # env.reset()
+    test_env.rollout(params, render=True)
    out = env(params)
-    print(out)
+    # out = env(params)
    # print(out)
--- a/dmp_pd_control_example.py
+++ b/dmp_pd_control_example.py
@ -5,29 +5,23 @@ import numpy as np
 if __name__ == "__main__":
-    dim = 24
+    dim = 15
    n_cpus = 4
-    n_samples = 10
+    n_samples = 1
    vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
                                n_samples=n_samples)
-    # params = 10 * np.random.randn(n_samples, dim)
+    params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
    params = np.array([[ -4.51280364,  24.43701373,  15.73282129, -12.13020392,
         -8.57305795,   2.79806606,  -6.38613201,   5.99309385,
         -2.05631886,  24.71684748,  14.05989949, -14.60456967,
         10.51933419,  -2.43715355,  -6.0767578 ,  13.06498129,
          6.18038374,  11.4153859 ,   1.40753639,   5.57082387,
          9.81989309,   3.60558787,  -9.66996754,  14.28519904]])
-    out = vec_env(params)
+    rewards, infos = vec_env(params)
-    print(out)
+    print(rewards)
    #
-    non_vec_env = make_simple_env(0, 0)()
+    # non_vec_env = make_simple_env(0, 0)()
    #
    # params = 10 * np.random.randn(dim)
-    out2 = non_vec_env.rollout(params, render=True)
+    # out2 = non_vec_env.rollout(params, render=True)
-    print(out2)
+    # print(out2)