This commit is contained in:
Maximilian Huettenrauch 2021-02-17 17:48:05 +01:00
parent 420fe10506
commit 46fc642c36
6 changed files with 23 additions and 26 deletions

View File

@ -35,7 +35,6 @@ class HoleReacher(gym.Env):
self._angle_velocity = None self._angle_velocity = None
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)]) self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
self.start_vel = np.zeros(self.num_links) self.start_vel = np.zeros(self.num_links)
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
self.dt = 0.01 self.dt = 0.01
self.time_limit = 2 self.time_limit = 2
@ -68,6 +67,9 @@ class HoleReacher(gym.Env):
def end_effector(self): def end_effector(self):
return self._joints[self.num_links].T return self._joints[self.num_links].T
def configure(self, context):
pass
def reset(self): def reset(self):
self._joint_angles = self.start_pos self._joint_angles = self.start_pos
self._angle_velocity = self.start_vel self._angle_velocity = self.start_vel

View File

@ -62,7 +62,8 @@ def make_holereacher_env(rank, seed=0):
learn_goal=True, learn_goal=True,
alpha_phase=2, alpha_phase=2,
start_pos=_env.start_pos, start_pos=_env.start_pos,
policy_type="velocity" policy_type="velocity",
weights_scale=100,
) )
_env.seed(seed + rank) _env.seed(seed + rank)
return _env return _env

View File

@ -71,10 +71,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
dist_ctxt = self.dists_ctxt[-1] dist_ctxt = self.dists_ctxt[-1]
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt) # cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
cost = 2 * (0.33 * min_dist + 0.33 * dist_final + 0.33 * dist_ctxt) cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt)
reward = np.exp(-1 * cost) - 1e-4 * action_cost reward = np.exp(-1 * cost) - 1e-4 * action_cost
stop_sim = True stop_sim = True
success = dist_final < 0.05 and ball_in_cup success = dist_final < 0.05 and dist_ctxt < 0.05
else: else:
reward = - 1e-4 * action_cost reward = - 1e-4 * action_cost
success = False success = False

View File

@ -86,8 +86,6 @@ class DmpEnvWrapper(gym.Wrapper):
def rollout(self, params, context=None, render=False): def rollout(self, params, context=None, render=False):
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step""" """ This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
goal_pos, weight_matrix = self.goal_and_weights(params) goal_pos, weight_matrix = self.goal_and_weights(params)
if hasattr(self.env, "weight_matrix_scale"):
weight_matrix = weight_matrix * self.env.weight_matrix_scale
self.dmp.set_weights(weight_matrix, goal_pos) self.dmp.set_weights(weight_matrix, goal_pos)
trajectory, velocity = self.dmp.reference_trajectory(self.t) trajectory, velocity = self.dmp.reference_trajectory(self.t)

View File

@ -5,17 +5,19 @@ import numpy as np
if __name__ == "__main__": if __name__ == "__main__":
n_samples = 10 n_samples = 1
n_cpus = 4 n_cpus = 4
dim = 25 dim = 25
env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)], # env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
n_samples=n_samples) # n_samples=n_samples)
test_env = make_viapointreacher_env(0)()
params = np.random.randn(n_samples, dim) params = np.random.randn(n_samples, dim)
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])]) # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
# env.reset() test_env.rollout(params, render=True)
out = env(params)
print(out) # out = env(params)
# print(out)

View File

@ -5,29 +5,23 @@ import numpy as np
if __name__ == "__main__": if __name__ == "__main__":
dim = 24 dim = 15
n_cpus = 4 n_cpus = 4
n_samples = 10 n_samples = 1
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)], vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
n_samples=n_samples) n_samples=n_samples)
# params = 10 * np.random.randn(n_samples, dim) params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
out = vec_env(params) rewards, infos = vec_env(params)
print(out) print(rewards)
# #
non_vec_env = make_simple_env(0, 0)() # non_vec_env = make_simple_env(0, 0)()
# #
# params = 10 * np.random.randn(dim) # params = 10 * np.random.randn(dim)
out2 = non_vec_env.rollout(params, render=True) # out2 = non_vec_env.rollout(params, render=True)
print(out2) # print(out2)