This commit is contained in:
Maximilian Huettenrauch 2021-02-15 09:03:19 +01:00
parent 95250af31c
commit 77d0cbd00a
4 changed files with 103 additions and 142 deletions

View File

@ -1,8 +1,9 @@
from alr_envs.classic_control.hole_reacher import HoleReacher from alr_envs.classic_control.hole_reacher import HoleReacher
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
def make_env(rank, seed=0): def make_viapointreacher_env(rank, seed=0):
""" """
Utility function for multiprocessed env. Utility function for multiprocessed env.
@ -14,7 +15,38 @@ def make_env(rank, seed=0):
""" """
def _init(): def _init():
env = HoleReacher(num_links=5, _env = ViaPointReacher(num_links=5,
allow_self_collision=False,
collision_penalty=1000)
_env = DmpEnvWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
alpha_phase=2,
dt=_env.dt,
start_pos=_env.start_pos,
learn_goal=False,
policy_type="velocity")
_env.seed(seed + rank)
return _env
return _init
def make_holereacher_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
_env = HoleReacher(num_links=5,
allow_self_collision=False, allow_self_collision=False,
allow_wall_collision=False, allow_wall_collision=False,
hole_width=0.15, hole_width=0.15,
@ -22,13 +54,17 @@ def make_env(rank, seed=0):
hole_x=1, hole_x=1,
collision_penalty=100000) collision_penalty=100000)
env = DmpEnvWrapperVel(env, _env = DmpEnvWrapper(_env,
num_dof=5, num_dof=5,
num_basis=5, num_basis=5,
duration=2, duration=2,
dt=env.dt, dt=_env.dt,
learn_goal=True) learn_goal=True,
env.seed(seed + rank) alpha_phase=2,
return env start_pos=_env.start_pos,
policy_type="velocity"
)
_env.seed(seed + rank)
return _env
return _init return _init

View File

@ -13,29 +13,24 @@ def intersect(A, B, C, D):
return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D) return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
class HoleReacher(gym.Env): class ViaPointReacher(gym.Env):
def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False, def __init__(self, num_links, allow_self_collision=False,
allow_wall_collision=False, collision_penalty=1000): collision_penalty=1000):
self.hole_x = hole_x # x-position of center of hole
self.hole_width = hole_width # width of hole
self.hole_depth = hole_depth # depth of hole
self.num_links = num_links self.num_links = num_links
self.link_lengths = np.ones((num_links, 1)) self.link_lengths = np.ones((num_links, 1))
self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
self.top_center_of_hole = np.hstack([hole_x, 0])
self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0])
self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
self.allow_self_collision = allow_self_collision self.allow_self_collision = allow_self_collision
self.allow_wall_collision = allow_wall_collision
self.collision_penalty = collision_penalty self.collision_penalty = collision_penalty
self.via_point = np.ones(2)
self.goal_point = np.array((num_links, 0))
self._joints = None self._joints = None
self._joint_angles = None self._joint_angles = None
self._angle_velocity = None self._angle_velocity = None
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)]) self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
self.start_vel = np.zeros(self.num_links) self.start_vel = np.zeros(self.num_links)
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer self.weight_matrix_scale = 1
self.dt = 0.01 self.dt = 0.01
self.time_limit = 2 self.time_limit = 2
@ -52,22 +47,14 @@ class HoleReacher(gym.Env):
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape) self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
self.fig = None self.fig = None
rect_1 = patches.Rectangle((-self.num_links, -1),
self.num_links + self.hole_x - self.hole_width / 2, 1,
fill=True, edgecolor='k', facecolor='k')
rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
self.num_links - self.hole_x + self.hole_width / 2, 1,
fill=True, edgecolor='k', facecolor='k')
rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
1 - self.hole_depth,
fill=True, edgecolor='k', facecolor='k')
self.patches = [rect_1, rect_2, rect_3]
@property @property
def end_effector(self): def end_effector(self):
return self._joints[self.num_links].T return self._joints[self.num_links].T
def configure(self, context):
pass
def reset(self): def reset(self):
self._joint_angles = self.start_pos self._joint_angles = self.start_pos
self._angle_velocity = self.start_vel self._angle_velocity = self.start_vel
@ -94,16 +81,16 @@ class HoleReacher(gym.Env):
dist_reward = 0 dist_reward = 0
if not self._is_collided: if not self._is_collided:
if self._steps == 180: if self._steps == 100:
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) dist_reward = np.linalg.norm(self.end_effector - self.via_point)
else: if self._steps == 200:
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole) dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
reward = - dist_reward ** 2 reward = - dist_reward ** 2
reward -= 1e-6 * np.sum(acc**2) reward -= 1e-6 * np.sum(acc**2)
if self._steps == 180: if self._steps == 200:
reward -= 0.1 * np.sum(vel**2) ** 2 reward -= 0.1 * np.sum(vel**2) ** 2
if self._is_collided: if self._is_collided:
@ -129,17 +116,13 @@ class HoleReacher(gym.Env):
self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1] self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
self_collision = False self_collision = False
wall_collision = False
if not self.allow_self_collision: if not self.allow_self_collision:
self_collision = self.check_self_collision(line_points_in_taskspace) self_collision = self.check_self_collision(line_points_in_taskspace)
if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision: if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
self_collision = True self_collision = True
if not self.allow_wall_collision: self._is_collided = self_collision
wall_collision = self.check_wall_collision(line_points_in_taskspace)
self._is_collided = self_collision or wall_collision
def _get_obs(self): def _get_obs(self):
theta = self._joint_angles theta = self._joint_angles
@ -147,7 +130,8 @@ class HoleReacher(gym.Env):
np.cos(theta), np.cos(theta),
np.sin(theta), np.sin(theta),
self._angle_velocity, self._angle_velocity,
self.end_effector - self.bottom_center_of_hole, self.end_effector - self.via_point,
self.end_effector - self.goal_point,
self._steps self._steps
]) ])
@ -237,17 +221,14 @@ class HoleReacher(gym.Env):
if mode == "human": if mode == "human":
plt.cla() plt.cla()
plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}") plt.title(f"Iteration: {self._steps}")
# Arm # Arm
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k') plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
# Add the patch to the Axes
[plt.gca().add_patch(rect) for rect in self.patches]
lim = np.sum(self.link_lengths) + 0.5 lim = np.sum(self.link_lengths) + 0.5
plt.xlim([-lim, lim]) plt.xlim([-lim, lim])
plt.ylim([-1.1, lim]) plt.ylim([-lim, lim])
# plt.draw() # plt.draw()
plt.pause(1e-4) # pushes window to foreground, which is annoying. plt.pause(1e-4) # pushes window to foreground, which is annoying.
# self.fig.canvas.flush_events() # self.fig.canvas.flush_events()
@ -293,14 +274,14 @@ class HoleReacher(gym.Env):
if __name__ == '__main__': if __name__ == '__main__':
nl = 5 nl = 5
render_mode = "human" # "human" or "partial" or "final" render_mode = "human" # "human" or "partial" or "final"
env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1) env = ViaPointReacher(num_links=nl, allow_self_collision=False)
env.reset() env.reset()
# env.render(mode=render_mode) env.render(mode=render_mode)
for i in range(200): for i in range(300):
# objective.load_result("/tmp/cma") # objective.load_result("/tmp/cma")
# test with random actions # test with random actions
ac = 2 * env.action_space.sample() ac = env.action_space.sample()
# ac[0] += np.pi/2 # ac[0] += np.pi/2
obs, rew, d, info = env.step(ac) obs, rew, d, info = env.step(ac)
env.render(mode=render_mode) env.render(mode=render_mode)

View File

@ -1,52 +1,19 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper from alr_envs.classic_control.utils import make_viapointreacher_env
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
from alr_envs.classic_control.hole_reacher import HoleReacher
import numpy as np import numpy as np
if __name__ == "__main__": if __name__ == "__main__":
def make_env(rank, seed=0): n_samples = 10
""" n_cpus = 4
Utility function for multiprocessed env. dim = 25
:param env_id: (str) the environment ID env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
:param num_env: (int) the number of environments you wish to have in subprocesses n_samples=n_samples)
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
"""
def _init():
_env = HoleReacher(num_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
hole_depth=1,
hole_x=1)
_env = DmpEnvWrapper(_env, params = np.random.randn(n_samples, dim)
num_dof=5, # params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
num_basis=5,
duration=2,
dt=_env.dt,
learn_goal=True,
alpha_phase=2,
start_pos=_env.start_pos,
policy_type="velocity"
)
_env.seed(seed + rank)
return _env
return _init
n_samples = 4
env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
n_samples=n_samples,
context="spawn",
shared_memory=False,
worker=_worker)
# params = np.random.randn(4, 25)
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
# env.reset() # env.reset()
out = env(params) out = env(params)

View File

@ -1,55 +1,32 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
import numpy as np import numpy as np
if __name__ == "__main__": if __name__ == "__main__":
def make_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
"""
def _init():
_env = ALRBallInACupEnv()
_env = DmpEnvWrapper(_env,
num_dof=3,
num_basis=8,
duration=3.5,
alpha_phase=3,
dt=_env.dt,
learn_goal=False,
start_pos=_env.start_pos[1::2],
final_pos=_env.start_pos[1::2],
policy_type="motor"
)
_env.seed(seed + rank)
return _env
return _init
dim = 24 dim = 24
n_cpus = 4
n_samples = 10 n_samples = 10
vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)], vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
n_samples=n_samples, n_samples=n_samples)
context="spawn",
shared_memory=False,
worker=_worker)
params = 10 * np.random.randn(n_samples, dim) # params = 10 * np.random.randn(n_samples, dim)
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
out = vec_env(params) out = vec_env(params)
print(out)
non_vec_env = make_env(0, 0)() #
non_vec_env = make_simple_env(0, 0)()
params = 10 * np.random.randn(dim) #
# params = 10 * np.random.randn(dim)
out2 = non_vec_env.rollout(params, render=True) out2 = non_vec_env.rollout(params, render=True)