This commit is contained in:
Maximilian Huettenrauch 2021-02-15 09:03:19 +01:00
parent 95250af31c
commit 77d0cbd00a
4 changed files with 103 additions and 142 deletions

View File

@ -1,8 +1,9 @@
from alr_envs.classic_control.hole_reacher import HoleReacher
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
def make_env(rank, seed=0):
def make_viapointreacher_env(rank, seed=0):
"""
Utility function for multiprocessed env.
@ -14,7 +15,38 @@ def make_env(rank, seed=0):
"""
def _init():
env = HoleReacher(num_links=5,
_env = ViaPointReacher(num_links=5,
allow_self_collision=False,
collision_penalty=1000)
_env = DmpEnvWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
alpha_phase=2,
dt=_env.dt,
start_pos=_env.start_pos,
learn_goal=False,
policy_type="velocity")
_env.seed(seed + rank)
return _env
return _init
def make_holereacher_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
_env = HoleReacher(num_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
@ -22,13 +54,17 @@ def make_env(rank, seed=0):
hole_x=1,
collision_penalty=100000)
env = DmpEnvWrapperVel(env,
_env = DmpEnvWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
dt=env.dt,
learn_goal=True)
env.seed(seed + rank)
return env
dt=_env.dt,
learn_goal=True,
alpha_phase=2,
start_pos=_env.start_pos,
policy_type="velocity"
)
_env.seed(seed + rank)
return _env
return _init

View File

@ -13,29 +13,24 @@ def intersect(A, B, C, D):
return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
class HoleReacher(gym.Env):
class ViaPointReacher(gym.Env):
def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False,
allow_wall_collision=False, collision_penalty=1000):
self.hole_x = hole_x # x-position of center of hole
self.hole_width = hole_width # width of hole
self.hole_depth = hole_depth # depth of hole
def __init__(self, num_links, allow_self_collision=False,
collision_penalty=1000):
self.num_links = num_links
self.link_lengths = np.ones((num_links, 1))
self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
self.top_center_of_hole = np.hstack([hole_x, 0])
self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0])
self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
self.allow_self_collision = allow_self_collision
self.allow_wall_collision = allow_wall_collision
self.collision_penalty = collision_penalty
self.via_point = np.ones(2)
self.goal_point = np.array((num_links, 0))
self._joints = None
self._joint_angles = None
self._angle_velocity = None
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
self.start_vel = np.zeros(self.num_links)
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
self.weight_matrix_scale = 1
self.dt = 0.01
self.time_limit = 2
@ -52,22 +47,14 @@ class HoleReacher(gym.Env):
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
self.fig = None
rect_1 = patches.Rectangle((-self.num_links, -1),
self.num_links + self.hole_x - self.hole_width / 2, 1,
fill=True, edgecolor='k', facecolor='k')
rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
self.num_links - self.hole_x + self.hole_width / 2, 1,
fill=True, edgecolor='k', facecolor='k')
rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
1 - self.hole_depth,
fill=True, edgecolor='k', facecolor='k')
self.patches = [rect_1, rect_2, rect_3]
@property
def end_effector(self):
return self._joints[self.num_links].T
def configure(self, context):
pass
def reset(self):
self._joint_angles = self.start_pos
self._angle_velocity = self.start_vel
@ -94,16 +81,16 @@ class HoleReacher(gym.Env):
dist_reward = 0
if not self._is_collided:
if self._steps == 180:
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
else:
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
if self._steps == 100:
dist_reward = np.linalg.norm(self.end_effector - self.via_point)
if self._steps == 200:
dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
reward = - dist_reward ** 2
reward -= 1e-6 * np.sum(acc**2)
if self._steps == 180:
if self._steps == 200:
reward -= 0.1 * np.sum(vel**2) ** 2
if self._is_collided:
@ -129,17 +116,13 @@ class HoleReacher(gym.Env):
self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
self_collision = False
wall_collision = False
if not self.allow_self_collision:
self_collision = self.check_self_collision(line_points_in_taskspace)
if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
self_collision = True
if not self.allow_wall_collision:
wall_collision = self.check_wall_collision(line_points_in_taskspace)
self._is_collided = self_collision or wall_collision
self._is_collided = self_collision
def _get_obs(self):
theta = self._joint_angles
@ -147,7 +130,8 @@ class HoleReacher(gym.Env):
np.cos(theta),
np.sin(theta),
self._angle_velocity,
self.end_effector - self.bottom_center_of_hole,
self.end_effector - self.via_point,
self.end_effector - self.goal_point,
self._steps
])
@ -237,17 +221,14 @@ class HoleReacher(gym.Env):
if mode == "human":
plt.cla()
plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}")
plt.title(f"Iteration: {self._steps}")
# Arm
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
# Add the patch to the Axes
[plt.gca().add_patch(rect) for rect in self.patches]
lim = np.sum(self.link_lengths) + 0.5
plt.xlim([-lim, lim])
plt.ylim([-1.1, lim])
plt.ylim([-lim, lim])
# plt.draw()
plt.pause(1e-4) # pushes window to foreground, which is annoying.
# self.fig.canvas.flush_events()
@ -293,14 +274,14 @@ class HoleReacher(gym.Env):
if __name__ == '__main__':
nl = 5
render_mode = "human" # "human" or "partial" or "final"
env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1)
env = ViaPointReacher(num_links=nl, allow_self_collision=False)
env.reset()
# env.render(mode=render_mode)
env.render(mode=render_mode)
for i in range(200):
for i in range(300):
# objective.load_result("/tmp/cma")
# test with random actions
ac = 2 * env.action_space.sample()
ac = env.action_space.sample()
# ac[0] += np.pi/2
obs, rew, d, info = env.step(ac)
env.render(mode=render_mode)

View File

@ -1,52 +1,19 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
from alr_envs.classic_control.hole_reacher import HoleReacher
from alr_envs.classic_control.utils import make_viapointreacher_env
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
import numpy as np
if __name__ == "__main__":
def make_env(rank, seed=0):
"""
Utility function for multiprocessed env.
n_samples = 10
n_cpus = 4
dim = 25
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
"""
def _init():
_env = HoleReacher(num_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
hole_depth=1,
hole_x=1)
env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
n_samples=n_samples)
_env = DmpEnvWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
dt=_env.dt,
learn_goal=True,
alpha_phase=2,
start_pos=_env.start_pos,
policy_type="velocity"
)
_env.seed(seed + rank)
return _env
return _init
n_samples = 4
env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
n_samples=n_samples,
context="spawn",
shared_memory=False,
worker=_worker)
# params = np.random.randn(4, 25)
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
params = np.random.randn(n_samples, dim)
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
# env.reset()
out = env(params)

View File

@ -1,55 +1,32 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
import numpy as np
if __name__ == "__main__":
def make_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the inital seed for RNG
:param rank: (int) index of the subprocess
"""
def _init():
_env = ALRBallInACupEnv()
_env = DmpEnvWrapper(_env,
num_dof=3,
num_basis=8,
duration=3.5,
alpha_phase=3,
dt=_env.dt,
learn_goal=False,
start_pos=_env.start_pos[1::2],
final_pos=_env.start_pos[1::2],
policy_type="motor"
)
_env.seed(seed + rank)
return _env
return _init
dim = 24
n_cpus = 4
n_samples = 10
vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
n_samples=n_samples,
context="spawn",
shared_memory=False,
worker=_worker)
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
n_samples=n_samples)
params = 10 * np.random.randn(n_samples, dim)
# params = 10 * np.random.randn(n_samples, dim)
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
out = vec_env(params)
non_vec_env = make_env(0, 0)()
params = 10 * np.random.randn(dim)
print(out)
#
non_vec_env = make_simple_env(0, 0)()
#
# params = 10 * np.random.randn(dim)
out2 = non_vec_env.rollout(params, render=True)