updates
This commit is contained in:
parent
420fe10506
commit
46fc642c36
@ -35,7 +35,6 @@ class HoleReacher(gym.Env):
|
||||
self._angle_velocity = None
|
||||
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
|
||||
self.start_vel = np.zeros(self.num_links)
|
||||
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
|
||||
|
||||
self.dt = 0.01
|
||||
self.time_limit = 2
|
||||
@ -68,6 +67,9 @@ class HoleReacher(gym.Env):
|
||||
def end_effector(self):
|
||||
return self._joints[self.num_links].T
|
||||
|
||||
def configure(self, context):
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
self._joint_angles = self.start_pos
|
||||
self._angle_velocity = self.start_vel
|
||||
|
@ -62,7 +62,8 @@ def make_holereacher_env(rank, seed=0):
|
||||
learn_goal=True,
|
||||
alpha_phase=2,
|
||||
start_pos=_env.start_pos,
|
||||
policy_type="velocity"
|
||||
policy_type="velocity",
|
||||
weights_scale=100,
|
||||
)
|
||||
_env.seed(seed + rank)
|
||||
return _env
|
||||
|
@ -71,10 +71,10 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
||||
dist_ctxt = self.dists_ctxt[-1]
|
||||
|
||||
# cost = self._get_stage_wise_cost(ball_in_cup, min_dist, dist_final, dist_ctxt)
|
||||
cost = 2 * (0.33 * min_dist + 0.33 * dist_final + 0.33 * dist_ctxt)
|
||||
cost = 2 * (0.5 * min_dist + 0.5 * dist_final + 0.1 * dist_ctxt)
|
||||
reward = np.exp(-1 * cost) - 1e-4 * action_cost
|
||||
stop_sim = True
|
||||
success = dist_final < 0.05 and ball_in_cup
|
||||
success = dist_final < 0.05 and dist_ctxt < 0.05
|
||||
else:
|
||||
reward = - 1e-4 * action_cost
|
||||
success = False
|
||||
|
@ -86,8 +86,6 @@ class DmpEnvWrapper(gym.Wrapper):
|
||||
def rollout(self, params, context=None, render=False):
|
||||
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
|
||||
goal_pos, weight_matrix = self.goal_and_weights(params)
|
||||
if hasattr(self.env, "weight_matrix_scale"):
|
||||
weight_matrix = weight_matrix * self.env.weight_matrix_scale
|
||||
self.dmp.set_weights(weight_matrix, goal_pos)
|
||||
trajectory, velocity = self.dmp.reference_trajectory(self.t)
|
||||
|
||||
|
@ -5,17 +5,19 @@ import numpy as np
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
n_samples = 10
|
||||
n_samples = 1
|
||||
n_cpus = 4
|
||||
dim = 25
|
||||
|
||||
env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
||||
n_samples=n_samples)
|
||||
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
||||
# n_samples=n_samples)
|
||||
|
||||
test_env = make_viapointreacher_env(0)()
|
||||
|
||||
params = np.random.randn(n_samples, dim)
|
||||
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||
|
||||
# env.reset()
|
||||
out = env(params)
|
||||
test_env.rollout(params, render=True)
|
||||
|
||||
print(out)
|
||||
# out = env(params)
|
||||
# print(out)
|
||||
|
@ -5,29 +5,23 @@ import numpy as np
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
dim = 24
|
||||
dim = 15
|
||||
n_cpus = 4
|
||||
|
||||
n_samples = 10
|
||||
n_samples = 1
|
||||
|
||||
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
||||
n_samples=n_samples)
|
||||
|
||||
# params = 10 * np.random.randn(n_samples, dim)
|
||||
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
|
||||
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
|
||||
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
|
||||
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
|
||||
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
|
||||
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
|
||||
params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
|
||||
|
||||
out = vec_env(params)
|
||||
print(out)
|
||||
rewards, infos = vec_env(params)
|
||||
print(rewards)
|
||||
#
|
||||
non_vec_env = make_simple_env(0, 0)()
|
||||
# non_vec_env = make_simple_env(0, 0)()
|
||||
#
|
||||
# params = 10 * np.random.randn(dim)
|
||||
|
||||
out2 = non_vec_env.rollout(params, render=True)
|
||||
# out2 = non_vec_env.rollout(params, render=True)
|
||||
|
||||
print(out2)
|
||||
# print(out2)
|
||||
|
Loading…
Reference in New Issue
Block a user