updates
This commit is contained in:
parent
95250af31c
commit
77d0cbd00a
@ -1,8 +1,9 @@
|
|||||||
from alr_envs.classic_control.hole_reacher import HoleReacher
|
from alr_envs.classic_control.hole_reacher import HoleReacher
|
||||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
|
from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
|
||||||
|
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||||
|
|
||||||
|
|
||||||
def make_env(rank, seed=0):
|
def make_viapointreacher_env(rank, seed=0):
|
||||||
"""
|
"""
|
||||||
Utility function for multiprocessed env.
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
@ -14,7 +15,38 @@ def make_env(rank, seed=0):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def _init():
|
def _init():
|
||||||
env = HoleReacher(num_links=5,
|
_env = ViaPointReacher(num_links=5,
|
||||||
|
allow_self_collision=False,
|
||||||
|
collision_penalty=1000)
|
||||||
|
|
||||||
|
_env = DmpEnvWrapper(_env,
|
||||||
|
num_dof=5,
|
||||||
|
num_basis=5,
|
||||||
|
duration=2,
|
||||||
|
alpha_phase=2,
|
||||||
|
dt=_env.dt,
|
||||||
|
start_pos=_env.start_pos,
|
||||||
|
learn_goal=False,
|
||||||
|
policy_type="velocity")
|
||||||
|
_env.seed(seed + rank)
|
||||||
|
return _env
|
||||||
|
|
||||||
|
return _init
|
||||||
|
|
||||||
|
|
||||||
|
def make_holereacher_env(rank, seed=0):
|
||||||
|
"""
|
||||||
|
Utility function for multiprocessed env.
|
||||||
|
|
||||||
|
:param env_id: (str) the environment ID
|
||||||
|
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||||
|
:param seed: (int) the initial seed for RNG
|
||||||
|
:param rank: (int) index of the subprocess
|
||||||
|
:returns a function that generates an environment
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _init():
|
||||||
|
_env = HoleReacher(num_links=5,
|
||||||
allow_self_collision=False,
|
allow_self_collision=False,
|
||||||
allow_wall_collision=False,
|
allow_wall_collision=False,
|
||||||
hole_width=0.15,
|
hole_width=0.15,
|
||||||
@ -22,13 +54,17 @@ def make_env(rank, seed=0):
|
|||||||
hole_x=1,
|
hole_x=1,
|
||||||
collision_penalty=100000)
|
collision_penalty=100000)
|
||||||
|
|
||||||
env = DmpEnvWrapperVel(env,
|
_env = DmpEnvWrapper(_env,
|
||||||
num_dof=5,
|
num_dof=5,
|
||||||
num_basis=5,
|
num_basis=5,
|
||||||
duration=2,
|
duration=2,
|
||||||
dt=env.dt,
|
dt=_env.dt,
|
||||||
learn_goal=True)
|
learn_goal=True,
|
||||||
env.seed(seed + rank)
|
alpha_phase=2,
|
||||||
return env
|
start_pos=_env.start_pos,
|
||||||
|
policy_type="velocity"
|
||||||
|
)
|
||||||
|
_env.seed(seed + rank)
|
||||||
|
return _env
|
||||||
|
|
||||||
return _init
|
return _init
|
||||||
|
@ -13,29 +13,24 @@ def intersect(A, B, C, D):
|
|||||||
return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
|
return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
|
||||||
|
|
||||||
|
|
||||||
class HoleReacher(gym.Env):
|
class ViaPointReacher(gym.Env):
|
||||||
|
|
||||||
def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False,
|
def __init__(self, num_links, allow_self_collision=False,
|
||||||
allow_wall_collision=False, collision_penalty=1000):
|
collision_penalty=1000):
|
||||||
self.hole_x = hole_x # x-position of center of hole
|
|
||||||
self.hole_width = hole_width # width of hole
|
|
||||||
self.hole_depth = hole_depth # depth of hole
|
|
||||||
self.num_links = num_links
|
self.num_links = num_links
|
||||||
self.link_lengths = np.ones((num_links, 1))
|
self.link_lengths = np.ones((num_links, 1))
|
||||||
self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
|
|
||||||
self.top_center_of_hole = np.hstack([hole_x, 0])
|
|
||||||
self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0])
|
|
||||||
self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
|
|
||||||
self.allow_self_collision = allow_self_collision
|
self.allow_self_collision = allow_self_collision
|
||||||
self.allow_wall_collision = allow_wall_collision
|
|
||||||
self.collision_penalty = collision_penalty
|
self.collision_penalty = collision_penalty
|
||||||
|
|
||||||
|
self.via_point = np.ones(2)
|
||||||
|
self.goal_point = np.array((num_links, 0))
|
||||||
|
|
||||||
self._joints = None
|
self._joints = None
|
||||||
self._joint_angles = None
|
self._joint_angles = None
|
||||||
self._angle_velocity = None
|
self._angle_velocity = None
|
||||||
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
|
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
|
||||||
self.start_vel = np.zeros(self.num_links)
|
self.start_vel = np.zeros(self.num_links)
|
||||||
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
|
self.weight_matrix_scale = 1
|
||||||
|
|
||||||
self.dt = 0.01
|
self.dt = 0.01
|
||||||
self.time_limit = 2
|
self.time_limit = 2
|
||||||
@ -52,22 +47,14 @@ class HoleReacher(gym.Env):
|
|||||||
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
|
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
|
||||||
|
|
||||||
self.fig = None
|
self.fig = None
|
||||||
rect_1 = patches.Rectangle((-self.num_links, -1),
|
|
||||||
self.num_links + self.hole_x - self.hole_width / 2, 1,
|
|
||||||
fill=True, edgecolor='k', facecolor='k')
|
|
||||||
rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
|
|
||||||
self.num_links - self.hole_x + self.hole_width / 2, 1,
|
|
||||||
fill=True, edgecolor='k', facecolor='k')
|
|
||||||
rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
|
|
||||||
1 - self.hole_depth,
|
|
||||||
fill=True, edgecolor='k', facecolor='k')
|
|
||||||
|
|
||||||
self.patches = [rect_1, rect_2, rect_3]
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def end_effector(self):
|
def end_effector(self):
|
||||||
return self._joints[self.num_links].T
|
return self._joints[self.num_links].T
|
||||||
|
|
||||||
|
def configure(self, context):
|
||||||
|
pass
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self._joint_angles = self.start_pos
|
self._joint_angles = self.start_pos
|
||||||
self._angle_velocity = self.start_vel
|
self._angle_velocity = self.start_vel
|
||||||
@ -94,16 +81,16 @@ class HoleReacher(gym.Env):
|
|||||||
|
|
||||||
dist_reward = 0
|
dist_reward = 0
|
||||||
if not self._is_collided:
|
if not self._is_collided:
|
||||||
if self._steps == 180:
|
if self._steps == 100:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
dist_reward = np.linalg.norm(self.end_effector - self.via_point)
|
||||||
else:
|
if self._steps == 200:
|
||||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
|
||||||
|
|
||||||
reward = - dist_reward ** 2
|
reward = - dist_reward ** 2
|
||||||
|
|
||||||
reward -= 1e-6 * np.sum(acc**2)
|
reward -= 1e-6 * np.sum(acc**2)
|
||||||
|
|
||||||
if self._steps == 180:
|
if self._steps == 200:
|
||||||
reward -= 0.1 * np.sum(vel**2) ** 2
|
reward -= 0.1 * np.sum(vel**2) ** 2
|
||||||
|
|
||||||
if self._is_collided:
|
if self._is_collided:
|
||||||
@ -129,17 +116,13 @@ class HoleReacher(gym.Env):
|
|||||||
self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
|
self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
|
||||||
|
|
||||||
self_collision = False
|
self_collision = False
|
||||||
wall_collision = False
|
|
||||||
|
|
||||||
if not self.allow_self_collision:
|
if not self.allow_self_collision:
|
||||||
self_collision = self.check_self_collision(line_points_in_taskspace)
|
self_collision = self.check_self_collision(line_points_in_taskspace)
|
||||||
if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
|
if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
|
||||||
self_collision = True
|
self_collision = True
|
||||||
|
|
||||||
if not self.allow_wall_collision:
|
self._is_collided = self_collision
|
||||||
wall_collision = self.check_wall_collision(line_points_in_taskspace)
|
|
||||||
|
|
||||||
self._is_collided = self_collision or wall_collision
|
|
||||||
|
|
||||||
def _get_obs(self):
|
def _get_obs(self):
|
||||||
theta = self._joint_angles
|
theta = self._joint_angles
|
||||||
@ -147,7 +130,8 @@ class HoleReacher(gym.Env):
|
|||||||
np.cos(theta),
|
np.cos(theta),
|
||||||
np.sin(theta),
|
np.sin(theta),
|
||||||
self._angle_velocity,
|
self._angle_velocity,
|
||||||
self.end_effector - self.bottom_center_of_hole,
|
self.end_effector - self.via_point,
|
||||||
|
self.end_effector - self.goal_point,
|
||||||
self._steps
|
self._steps
|
||||||
])
|
])
|
||||||
|
|
||||||
@ -237,17 +221,14 @@ class HoleReacher(gym.Env):
|
|||||||
|
|
||||||
if mode == "human":
|
if mode == "human":
|
||||||
plt.cla()
|
plt.cla()
|
||||||
plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}")
|
plt.title(f"Iteration: {self._steps}")
|
||||||
|
|
||||||
# Arm
|
# Arm
|
||||||
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
|
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
|
||||||
|
|
||||||
# Add the patch to the Axes
|
|
||||||
[plt.gca().add_patch(rect) for rect in self.patches]
|
|
||||||
|
|
||||||
lim = np.sum(self.link_lengths) + 0.5
|
lim = np.sum(self.link_lengths) + 0.5
|
||||||
plt.xlim([-lim, lim])
|
plt.xlim([-lim, lim])
|
||||||
plt.ylim([-1.1, lim])
|
plt.ylim([-lim, lim])
|
||||||
# plt.draw()
|
# plt.draw()
|
||||||
plt.pause(1e-4) # pushes window to foreground, which is annoying.
|
plt.pause(1e-4) # pushes window to foreground, which is annoying.
|
||||||
# self.fig.canvas.flush_events()
|
# self.fig.canvas.flush_events()
|
||||||
@ -293,14 +274,14 @@ class HoleReacher(gym.Env):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
nl = 5
|
nl = 5
|
||||||
render_mode = "human" # "human" or "partial" or "final"
|
render_mode = "human" # "human" or "partial" or "final"
|
||||||
env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1)
|
env = ViaPointReacher(num_links=nl, allow_self_collision=False)
|
||||||
env.reset()
|
env.reset()
|
||||||
# env.render(mode=render_mode)
|
env.render(mode=render_mode)
|
||||||
|
|
||||||
for i in range(200):
|
for i in range(300):
|
||||||
# objective.load_result("/tmp/cma")
|
# objective.load_result("/tmp/cma")
|
||||||
# test with random actions
|
# test with random actions
|
||||||
ac = 2 * env.action_space.sample()
|
ac = env.action_space.sample()
|
||||||
# ac[0] += np.pi/2
|
# ac[0] += np.pi/2
|
||||||
obs, rew, d, info = env.step(ac)
|
obs, rew, d, info = env.step(ac)
|
||||||
env.render(mode=render_mode)
|
env.render(mode=render_mode)
|
||||||
|
@ -1,52 +1,19 @@
|
|||||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
from alr_envs.classic_control.utils import make_viapointreacher_env
|
||||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
|
||||||
from alr_envs.classic_control.hole_reacher import HoleReacher
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
def make_env(rank, seed=0):
|
n_samples = 10
|
||||||
"""
|
n_cpus = 4
|
||||||
Utility function for multiprocessed env.
|
dim = 25
|
||||||
|
|
||||||
:param env_id: (str) the environment ID
|
env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
||||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
n_samples=n_samples)
|
||||||
:param seed: (int) the inital seed for RNG
|
|
||||||
:param rank: (int) index of the subprocess
|
|
||||||
"""
|
|
||||||
def _init():
|
|
||||||
_env = HoleReacher(num_links=5,
|
|
||||||
allow_self_collision=False,
|
|
||||||
allow_wall_collision=False,
|
|
||||||
hole_width=0.15,
|
|
||||||
hole_depth=1,
|
|
||||||
hole_x=1)
|
|
||||||
|
|
||||||
_env = DmpEnvWrapper(_env,
|
params = np.random.randn(n_samples, dim)
|
||||||
num_dof=5,
|
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||||
num_basis=5,
|
|
||||||
duration=2,
|
|
||||||
dt=_env.dt,
|
|
||||||
learn_goal=True,
|
|
||||||
alpha_phase=2,
|
|
||||||
start_pos=_env.start_pos,
|
|
||||||
policy_type="velocity"
|
|
||||||
)
|
|
||||||
_env.seed(seed + rank)
|
|
||||||
return _env
|
|
||||||
return _init
|
|
||||||
|
|
||||||
n_samples = 4
|
|
||||||
|
|
||||||
env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
|
|
||||||
n_samples=n_samples,
|
|
||||||
context="spawn",
|
|
||||||
shared_memory=False,
|
|
||||||
worker=_worker)
|
|
||||||
|
|
||||||
# params = np.random.randn(4, 25)
|
|
||||||
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
|
||||||
|
|
||||||
# env.reset()
|
# env.reset()
|
||||||
out = env(params)
|
out = env(params)
|
||||||
|
@ -1,55 +1,32 @@
|
|||||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env
|
||||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
def make_env(rank, seed=0):
|
|
||||||
"""
|
|
||||||
Utility function for multiprocessed env.
|
|
||||||
|
|
||||||
:param env_id: (str) the environment ID
|
|
||||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
|
||||||
:param seed: (int) the inital seed for RNG
|
|
||||||
:param rank: (int) index of the subprocess
|
|
||||||
"""
|
|
||||||
def _init():
|
|
||||||
_env = ALRBallInACupEnv()
|
|
||||||
|
|
||||||
_env = DmpEnvWrapper(_env,
|
|
||||||
num_dof=3,
|
|
||||||
num_basis=8,
|
|
||||||
duration=3.5,
|
|
||||||
alpha_phase=3,
|
|
||||||
dt=_env.dt,
|
|
||||||
learn_goal=False,
|
|
||||||
start_pos=_env.start_pos[1::2],
|
|
||||||
final_pos=_env.start_pos[1::2],
|
|
||||||
policy_type="motor"
|
|
||||||
)
|
|
||||||
_env.seed(seed + rank)
|
|
||||||
return _env
|
|
||||||
return _init
|
|
||||||
|
|
||||||
dim = 24
|
dim = 24
|
||||||
|
n_cpus = 4
|
||||||
|
|
||||||
n_samples = 10
|
n_samples = 10
|
||||||
|
|
||||||
vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
|
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
||||||
n_samples=n_samples,
|
n_samples=n_samples)
|
||||||
context="spawn",
|
|
||||||
shared_memory=False,
|
|
||||||
worker=_worker)
|
|
||||||
|
|
||||||
params = 10 * np.random.randn(n_samples, dim)
|
# params = 10 * np.random.randn(n_samples, dim)
|
||||||
|
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
|
||||||
|
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
|
||||||
|
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
|
||||||
|
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
|
||||||
|
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
|
||||||
|
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
|
||||||
|
|
||||||
out = vec_env(params)
|
out = vec_env(params)
|
||||||
|
print(out)
|
||||||
non_vec_env = make_env(0, 0)()
|
#
|
||||||
|
non_vec_env = make_simple_env(0, 0)()
|
||||||
params = 10 * np.random.randn(dim)
|
#
|
||||||
|
# params = 10 * np.random.randn(dim)
|
||||||
|
|
||||||
out2 = non_vec_env.rollout(params, render=True)
|
out2 = non_vec_env.rollout(params, render=True)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user