updates
This commit is contained in:
parent
420fe10506
commit
46fc642c36
@ -35,7 +35,6 @@ class HoleReacher(gym.Env):
|
|||||||
self._angle_velocity = None
|
self._angle_velocity = None
|
||||||
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
|
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
|
||||||
self.start_vel = np.zeros(self.num_links)
|
self.start_vel = np.zeros(self.num_links)
|
||||||
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
|
|
||||||
|
|
||||||
self.dt = 0.01
|
self.dt = 0.01
|
||||||
self.time_limit = 2
|
self.time_limit = 2
|
||||||
@ -68,6 +67,9 @@ class HoleReacher(gym.Env):
|
|||||||
def end_effector(self):
|
def end_effector(self):
|
||||||
return self._joints[self.num_links].T
|
return self._joints[self.num_links].T
|
||||||
|
|
||||||
|
def configure(self, context):
|
||||||
|
pass
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._joint_angles = self.start_pos
|
self._joint_angles = self.start_pos
|
||||||
self._angle_velocity = self.start_vel
|
self._angle_velocity = self.start_vel
|
||||||
|
@ -62,7 +62,8 @@ def make_holereacher_env(rank, seed=0):
|
|||||||
learn_goal=True,
|
learn_goal=True,
|
||||||
alpha_phase=2,
|
alpha_phase=2,
|
||||||
start_pos=_env.start_pos,
|
start_pos=_env.start_pos,
|
||||||
policy_type="velocity"
|
policy_type="velocity",
|
||||||
|
weights_scale=100,
|
||||||
)
|
)
|
||||||
_env.seed(seed + rank)
|
_env.seed(seed + rank)
|
||||||
return _env
|
return _env
|
||||||
|
@ -71,10 +71,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
dist_ctxt = self.dists_ctxt[-1]
|
dist_ctxt = self.dists_ctxt[-1]
|
||||||
|
|
||||||
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
|
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
|
||||||
cost = 2 * (0.33 * min_dist + 0.33 * dist_final + 0.33 * dist_ctxt)
|
cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt)
|
||||||
reward = np.exp(-1 * cost) - 1e-4 * action_cost
|
reward = np.exp(-1 * cost) - 1e-4 * action_cost
|
||||||
stop_sim = True
|
stop_sim = True
|
||||||
success = dist_final < 0.05 and ball_in_cup
|
success = dist_final < 0.05 and dist_ctxt < 0.05
|
||||||
else:
|
else:
|
||||||
reward = - 1e-4 * action_cost
|
reward = - 1e-4 * action_cost
|
||||||
success = False
|
success = False
|
||||||
|
@ -86,8 +86,6 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
def rollout(self, params, context=None, render=False):
|
def rollout(self, params, context=None, render=False):
|
||||||
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
||||||
goal_pos, weight_matrix = self.goal_and_weights(params)
|
goal_pos, weight_matrix = self.goal_and_weights(params)
|
||||||
if hasattr(self.env, "weight_matrix_scale"):
|
|
||||||
weight_matrix = weight_matrix * self.env.weight_matrix_scale
|
|
||||||
self.dmp.set_weights(weight_matrix, goal_pos)
|
self.dmp.set_weights(weight_matrix, goal_pos)
|
||||||
trajectory, velocity = self.dmp.reference_trajectory(self.t)
|
trajectory, velocity = self.dmp.reference_trajectory(self.t)
|
||||||
|
|
||||||
|
@ -5,17 +5,19 @@ import numpy as np
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
n_samples = 10
|
n_samples = 1
|
||||||
n_cpus = 4
|
n_cpus = 4
|
||||||
dim = 25
|
dim = 25
|
||||||
|
|
||||||
env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
||||||
n_samples=n_samples)
|
# n_samples=n_samples)
|
||||||
|
|
||||||
|
test_env = make_viapointreacher_env(0)()
|
||||||
|
|
||||||
params = np.random.randn(n_samples, dim)
|
params = np.random.randn(n_samples, dim)
|
||||||
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||||
|
|
||||||
# env.reset()
|
test_env.rollout(params, render=True)
|
||||||
out = env(params)
|
|
||||||
|
|
||||||
print(out)
|
# out = env(params)
|
||||||
|
# print(out)
|
||||||
|
@ -5,29 +5,23 @@ import numpy as np
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
dim = 24
|
dim = 15
|
||||||
n_cpus = 4
|
n_cpus = 4
|
||||||
|
|
||||||
n_samples = 10
|
n_samples = 1
|
||||||
|
|
||||||
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
||||||
n_samples=n_samples)
|
n_samples=n_samples)
|
||||||
|
|
||||||
# params = 10 * np.random.randn(n_samples, dim)
|
params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
|
||||||
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
|
|
||||||
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
|
|
||||||
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
|
|
||||||
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
|
|
||||||
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
|
|
||||||
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
|
|
||||||
|
|
||||||
out = vec_env(params)
|
rewards, infos = vec_env(params)
|
||||||
print(out)
|
print(rewards)
|
||||||
#
|
#
|
||||||
non_vec_env = make_simple_env(0, 0)()
|
# non_vec_env = make_simple_env(0, 0)()
|
||||||
#
|
#
|
||||||
# params = 10 * np.random.randn(dim)
|
# params = 10 * np.random.randn(dim)
|
||||||
|
|
||||||
out2 = non_vec_env.rollout(params, render=True)
|
# out2 = non_vec_env.rollout(params, render=True)
|
||||||
|
|
||||||
print(out2)
|
# print(out2)
|
||||||
|
Loading…
Reference in New Issue
Block a user