diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py index eeba84d..be1cd1f 100644 --- a/alr_envs/classic_control/hole_reacher.py +++ b/alr_envs/classic_control/hole_reacher.py @@ -35,7 +35,6 @@ class HoleReacher(gym.Env): self._angle_velocity = None self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)]) self.start_vel = np.zeros(self.num_links) - self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer self.dt = 0.01 self.time_limit = 2 @@ -68,6 +67,9 @@ class HoleReacher(gym.Env): def end_effector(self): return self._joints[self.num_links].T + def configure(self, context): + pass + def reset(self): self._joint_angles = self.start_pos self._angle_velocity = self.start_vel diff --git a/alr_envs/classic_control/utils.py b/alr_envs/classic_control/utils.py index f276d4a..9da138f 100644 --- a/alr_envs/classic_control/utils.py +++ b/alr_envs/classic_control/utils.py @@ -62,7 +62,8 @@ def make_holereacher_env(rank, seed=0): learn_goal=True, alpha_phase=2, start_pos=_env.start_pos, - policy_type="velocity" + policy_type="velocity", + weights_scale=100, ) _env.seed(seed + rank) return _env diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py index 7c68e35..73dc1c5 100644 --- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py +++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward.py @@ -71,10 +71,10 @@ class BallInACupReward(alr_reward_fct.AlrReward): dist_ctxt = self.dists_ctxt[-1] # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) - cost = 2 * (0.33 * min_dist + 0.33 * dist_final + 0.33 * dist_ctxt) + cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt) reward = np.exp(-1 * cost) - 1e-4 * action_cost stop_sim = True - success = dist_final < 0.05 and ball_in_cup + success = dist_final < 0.05 and dist_ctxt < 0.05 else: reward = - 1e-4 * action_cost success = False diff --git a/alr_envs/utils/dmp_env_wrapper.py b/alr_envs/utils/dmp_env_wrapper.py index 43bb030..849ac1b 100644 --- a/alr_envs/utils/dmp_env_wrapper.py +++ b/alr_envs/utils/dmp_env_wrapper.py @@ -86,8 +86,6 @@ class DmpEnvWrapper(gym.Wrapper): def rollout(self, params, context=None, render=False): """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" goal_pos, weight_matrix = self.goal_and_weights(params) - if hasattr(self.env, "weight_matrix_scale"): - weight_matrix = weight_matrix * self.env.weight_matrix_scale self.dmp.set_weights(weight_matrix, goal_pos) trajectory, velocity = self.dmp.reference_trajectory(self.t) diff --git a/dmp_env_wrapper_example.py b/dmp_env_wrapper_example.py index e63e11c..b971574 100644 --- a/dmp_env_wrapper_example.py +++ b/dmp_env_wrapper_example.py @@ -5,17 +5,19 @@ import numpy as np if __name__ == "__main__": - n_samples = 10 + n_samples = 1 n_cpus = 4 dim = 25 - env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], - n_samples=n_samples) + # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], + # n_samples=n_samples) + + test_env = make_viapointreacher_env(0)() params = np.random.randn(n_samples, dim) # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])]) - # env.reset() - out = env(params) + test_env.rollout(params, render=True) - print(out) + # out = env(params) + # print(out) diff --git a/dmp_pd_control_example.py b/dmp_pd_control_example.py index 33abe6e..303f979 100644 --- a/dmp_pd_control_example.py +++ b/dmp_pd_control_example.py @@ -5,29 +5,23 @@ import numpy as np if __name__ == "__main__": - dim = 24 + dim = 15 n_cpus = 4 - n_samples = 10 + n_samples = 1 vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)], n_samples=n_samples) - # params = 10 * np.random.randn(n_samples, dim) - params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392, - -8.57305795, 2.79806606, -6.38613201, 5.99309385, - -2.05631886, 24.71684748, 14.05989949, -14.60456967, - 10.51933419, -2.43715355, -6.0767578 , 13.06498129, - 6.18038374, 11.4153859 , 1.40753639, 5.57082387, - 9.81989309, 3.60558787, -9.66996754, 14.28519904]]) + params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1)) - out = vec_env(params) - print(out) + rewards, infos = vec_env(params) + print(rewards) # - non_vec_env = make_simple_env(0, 0)() + # non_vec_env = make_simple_env(0, 0)() # # params = 10 * np.random.randn(dim) - out2 = non_vec_env.rollout(params, render=True) + # out2 = non_vec_env.rollout(params, render=True) - print(out2) + # print(out2)