updates
This commit is contained in:
parent
95250af31c
commit
77d0cbd00a
@ -1,8 +1,9 @@
|
||||
from alr_envs.classic_control.hole_reacher import HoleReacher
|
||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
|
||||
from alr_envs.classic_control.viapoint_reacher import ViaPointReacher
|
||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||
|
||||
|
||||
def make_env(rank, seed=0):
|
||||
def make_viapointreacher_env(rank, seed=0):
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
|
||||
@ -14,21 +15,56 @@ def make_env(rank, seed=0):
|
||||
"""
|
||||
|
||||
def _init():
|
||||
env = HoleReacher(num_links=5,
|
||||
allow_self_collision=False,
|
||||
allow_wall_collision=False,
|
||||
hole_width=0.15,
|
||||
hole_depth=1,
|
||||
hole_x=1,
|
||||
collision_penalty=100000)
|
||||
_env = ViaPointReacher(num_links=5,
|
||||
allow_self_collision=False,
|
||||
collision_penalty=1000)
|
||||
|
||||
env = DmpEnvWrapperVel(env,
|
||||
num_dof=5,
|
||||
num_basis=5,
|
||||
duration=2,
|
||||
dt=env.dt,
|
||||
learn_goal=True)
|
||||
env.seed(seed + rank)
|
||||
return env
|
||||
_env = DmpEnvWrapper(_env,
|
||||
num_dof=5,
|
||||
num_basis=5,
|
||||
duration=2,
|
||||
alpha_phase=2,
|
||||
dt=_env.dt,
|
||||
start_pos=_env.start_pos,
|
||||
learn_goal=False,
|
||||
policy_type="velocity")
|
||||
_env.seed(seed + rank)
|
||||
return _env
|
||||
|
||||
return _init
|
||||
|
||||
|
||||
def make_holereacher_env(rank, seed=0):
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
|
||||
:param env_id: (str) the environment ID
|
||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||
:param seed: (int) the initial seed for RNG
|
||||
:param rank: (int) index of the subprocess
|
||||
:returns a function that generates an environment
|
||||
"""
|
||||
|
||||
def _init():
|
||||
_env = HoleReacher(num_links=5,
|
||||
allow_self_collision=False,
|
||||
allow_wall_collision=False,
|
||||
hole_width=0.15,
|
||||
hole_depth=1,
|
||||
hole_x=1,
|
||||
collision_penalty=100000)
|
||||
|
||||
_env = DmpEnvWrapper(_env,
|
||||
num_dof=5,
|
||||
num_basis=5,
|
||||
duration=2,
|
||||
dt=_env.dt,
|
||||
learn_goal=True,
|
||||
alpha_phase=2,
|
||||
start_pos=_env.start_pos,
|
||||
policy_type="velocity"
|
||||
)
|
||||
_env.seed(seed + rank)
|
||||
return _env
|
||||
|
||||
return _init
|
||||
|
@ -13,29 +13,24 @@ def intersect(A, B, C, D):
|
||||
return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D)
|
||||
|
||||
|
||||
class HoleReacher(gym.Env):
|
||||
class ViaPointReacher(gym.Env):
|
||||
|
||||
def __init__(self, num_links, hole_x, hole_width, hole_depth, allow_self_collision=False,
|
||||
allow_wall_collision=False, collision_penalty=1000):
|
||||
self.hole_x = hole_x # x-position of center of hole
|
||||
self.hole_width = hole_width # width of hole
|
||||
self.hole_depth = hole_depth # depth of hole
|
||||
def __init__(self, num_links, allow_self_collision=False,
|
||||
collision_penalty=1000):
|
||||
self.num_links = num_links
|
||||
self.link_lengths = np.ones((num_links, 1))
|
||||
self.bottom_center_of_hole = np.hstack([hole_x, -hole_depth])
|
||||
self.top_center_of_hole = np.hstack([hole_x, 0])
|
||||
self.left_wall_edge = np.hstack([hole_x - self.hole_width/2, 0])
|
||||
self.right_wall_edge = np.hstack([hole_x + self.hole_width / 2, 0])
|
||||
self.allow_self_collision = allow_self_collision
|
||||
self.allow_wall_collision = allow_wall_collision
|
||||
self.collision_penalty = collision_penalty
|
||||
|
||||
self.via_point = np.ones(2)
|
||||
self.goal_point = np.array((num_links, 0))
|
||||
|
||||
self._joints = None
|
||||
self._joint_angles = None
|
||||
self._angle_velocity = None
|
||||
self.start_pos = np.hstack([[np.pi/2], np.zeros(self.num_links - 1)])
|
||||
self.start_vel = np.zeros(self.num_links)
|
||||
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
|
||||
self.weight_matrix_scale = 1
|
||||
|
||||
self.dt = 0.01
|
||||
self.time_limit = 2
|
||||
@ -52,22 +47,14 @@ class HoleReacher(gym.Env):
|
||||
self.observation_space = gym.spaces.Box(low=-state_bound, high=state_bound, shape=state_bound.shape)
|
||||
|
||||
self.fig = None
|
||||
rect_1 = patches.Rectangle((-self.num_links, -1),
|
||||
self.num_links + self.hole_x - self.hole_width / 2, 1,
|
||||
fill=True, edgecolor='k', facecolor='k')
|
||||
rect_2 = patches.Rectangle((self.hole_x + self.hole_width / 2, -1),
|
||||
self.num_links - self.hole_x + self.hole_width / 2, 1,
|
||||
fill=True, edgecolor='k', facecolor='k')
|
||||
rect_3 = patches.Rectangle((self.hole_x - self.hole_width / 2, -1), self.hole_width,
|
||||
1 - self.hole_depth,
|
||||
fill=True, edgecolor='k', facecolor='k')
|
||||
|
||||
self.patches = [rect_1, rect_2, rect_3]
|
||||
|
||||
@property
|
||||
def end_effector(self):
|
||||
return self._joints[self.num_links].T
|
||||
|
||||
def configure(self, context):
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
self._joint_angles = self.start_pos
|
||||
self._angle_velocity = self.start_vel
|
||||
@ -94,16 +81,16 @@ class HoleReacher(gym.Env):
|
||||
|
||||
dist_reward = 0
|
||||
if not self._is_collided:
|
||||
if self._steps == 180:
|
||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
||||
else:
|
||||
dist_reward = np.linalg.norm(self.end_effector - self.bottom_center_of_hole)
|
||||
if self._steps == 100:
|
||||
dist_reward = np.linalg.norm(self.end_effector - self.via_point)
|
||||
if self._steps == 200:
|
||||
dist_reward = np.linalg.norm(self.end_effector - self.goal_point)
|
||||
|
||||
reward = - dist_reward ** 2
|
||||
|
||||
reward -= 1e-6 * np.sum(acc**2)
|
||||
|
||||
if self._steps == 180:
|
||||
if self._steps == 200:
|
||||
reward -= 0.1 * np.sum(vel**2) ** 2
|
||||
|
||||
if self._is_collided:
|
||||
@ -129,17 +116,13 @@ class HoleReacher(gym.Env):
|
||||
self._joints[1:, 1] = self._joints[0, 1] + line_points_in_taskspace[:, -1, 1]
|
||||
|
||||
self_collision = False
|
||||
wall_collision = False
|
||||
|
||||
if not self.allow_self_collision:
|
||||
self_collision = self.check_self_collision(line_points_in_taskspace)
|
||||
if np.any(np.abs(self._joint_angles) > np.pi) and not self.allow_self_collision:
|
||||
self_collision = True
|
||||
|
||||
if not self.allow_wall_collision:
|
||||
wall_collision = self.check_wall_collision(line_points_in_taskspace)
|
||||
|
||||
self._is_collided = self_collision or wall_collision
|
||||
self._is_collided = self_collision
|
||||
|
||||
def _get_obs(self):
|
||||
theta = self._joint_angles
|
||||
@ -147,7 +130,8 @@ class HoleReacher(gym.Env):
|
||||
np.cos(theta),
|
||||
np.sin(theta),
|
||||
self._angle_velocity,
|
||||
self.end_effector - self.bottom_center_of_hole,
|
||||
self.end_effector - self.via_point,
|
||||
self.end_effector - self.goal_point,
|
||||
self._steps
|
||||
])
|
||||
|
||||
@ -237,17 +221,14 @@ class HoleReacher(gym.Env):
|
||||
|
||||
if mode == "human":
|
||||
plt.cla()
|
||||
plt.title(f"Iteration: {self._steps}, distance: {self.end_effector - self.bottom_center_of_hole}")
|
||||
plt.title(f"Iteration: {self._steps}")
|
||||
|
||||
# Arm
|
||||
plt.plot(self._joints[:, 0], self._joints[:, 1], 'ro-', markerfacecolor='k')
|
||||
|
||||
# Add the patch to the Axes
|
||||
[plt.gca().add_patch(rect) for rect in self.patches]
|
||||
|
||||
lim = np.sum(self.link_lengths) + 0.5
|
||||
plt.xlim([-lim, lim])
|
||||
plt.ylim([-1.1, lim])
|
||||
plt.ylim([-lim, lim])
|
||||
# plt.draw()
|
||||
plt.pause(1e-4) # pushes window to foreground, which is annoying.
|
||||
# self.fig.canvas.flush_events()
|
||||
@ -293,14 +274,14 @@ class HoleReacher(gym.Env):
|
||||
if __name__ == '__main__':
|
||||
nl = 5
|
||||
render_mode = "human" # "human" or "partial" or "final"
|
||||
env = HoleReacher(num_links=nl, allow_self_collision=False, allow_wall_collision=False, hole_width=0.15, hole_depth=1, hole_x=1)
|
||||
env = ViaPointReacher(num_links=nl, allow_self_collision=False)
|
||||
env.reset()
|
||||
# env.render(mode=render_mode)
|
||||
env.render(mode=render_mode)
|
||||
|
||||
for i in range(200):
|
||||
for i in range(300):
|
||||
# objective.load_result("/tmp/cma")
|
||||
# test with random actions
|
||||
ac = 2 * env.action_space.sample()
|
||||
ac = env.action_space.sample()
|
||||
# ac[0] += np.pi/2
|
||||
obs, rew, d, info = env.step(ac)
|
||||
env.render(mode=render_mode)
|
||||
|
@ -1,52 +1,19 @@
|
||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
||||
from alr_envs.classic_control.hole_reacher import HoleReacher
|
||||
from alr_envs.classic_control.utils import make_viapointreacher_env
|
||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
|
||||
import numpy as np
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
def make_env(rank, seed=0):
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
n_samples = 10
|
||||
n_cpus = 4
|
||||
dim = 25
|
||||
|
||||
:param env_id: (str) the environment ID
|
||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||
:param seed: (int) the inital seed for RNG
|
||||
:param rank: (int) index of the subprocess
|
||||
"""
|
||||
def _init():
|
||||
_env = HoleReacher(num_links=5,
|
||||
allow_self_collision=False,
|
||||
allow_wall_collision=False,
|
||||
hole_width=0.15,
|
||||
hole_depth=1,
|
||||
hole_x=1)
|
||||
env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
|
||||
n_samples=n_samples)
|
||||
|
||||
_env = DmpEnvWrapper(_env,
|
||||
num_dof=5,
|
||||
num_basis=5,
|
||||
duration=2,
|
||||
dt=_env.dt,
|
||||
learn_goal=True,
|
||||
alpha_phase=2,
|
||||
start_pos=_env.start_pos,
|
||||
policy_type="velocity"
|
||||
)
|
||||
_env.seed(seed + rank)
|
||||
return _env
|
||||
return _init
|
||||
|
||||
n_samples = 4
|
||||
|
||||
env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
|
||||
n_samples=n_samples,
|
||||
context="spawn",
|
||||
shared_memory=False,
|
||||
worker=_worker)
|
||||
|
||||
# params = np.random.randn(4, 25)
|
||||
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||
params = np.random.randn(n_samples, dim)
|
||||
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||
|
||||
# env.reset()
|
||||
out = env(params)
|
||||
|
@ -1,55 +1,32 @@
|
||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
|
||||
from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_env
|
||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv
|
||||
import numpy as np
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
def make_env(rank, seed=0):
|
||||
"""
|
||||
Utility function for multiprocessed env.
|
||||
|
||||
:param env_id: (str) the environment ID
|
||||
:param num_env: (int) the number of environments you wish to have in subprocesses
|
||||
:param seed: (int) the inital seed for RNG
|
||||
:param rank: (int) index of the subprocess
|
||||
"""
|
||||
def _init():
|
||||
_env = ALRBallInACupEnv()
|
||||
|
||||
_env = DmpEnvWrapper(_env,
|
||||
num_dof=3,
|
||||
num_basis=8,
|
||||
duration=3.5,
|
||||
alpha_phase=3,
|
||||
dt=_env.dt,
|
||||
learn_goal=False,
|
||||
start_pos=_env.start_pos[1::2],
|
||||
final_pos=_env.start_pos[1::2],
|
||||
policy_type="motor"
|
||||
)
|
||||
_env.seed(seed + rank)
|
||||
return _env
|
||||
return _init
|
||||
|
||||
dim = 24
|
||||
n_cpus = 4
|
||||
|
||||
n_samples = 10
|
||||
|
||||
vec_env = DmpAsyncVectorEnv([make_env(i) for i in range(4)],
|
||||
n_samples=n_samples,
|
||||
context="spawn",
|
||||
shared_memory=False,
|
||||
worker=_worker)
|
||||
vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
|
||||
n_samples=n_samples)
|
||||
|
||||
params = 10 * np.random.randn(n_samples, dim)
|
||||
# params = 10 * np.random.randn(n_samples, dim)
|
||||
params = np.array([[ -4.51280364, 24.43701373, 15.73282129, -12.13020392,
|
||||
-8.57305795, 2.79806606, -6.38613201, 5.99309385,
|
||||
-2.05631886, 24.71684748, 14.05989949, -14.60456967,
|
||||
10.51933419, -2.43715355, -6.0767578 , 13.06498129,
|
||||
6.18038374, 11.4153859 , 1.40753639, 5.57082387,
|
||||
9.81989309, 3.60558787, -9.66996754, 14.28519904]])
|
||||
|
||||
out = vec_env(params)
|
||||
|
||||
non_vec_env = make_env(0, 0)()
|
||||
|
||||
params = 10 * np.random.randn(dim)
|
||||
print(out)
|
||||
#
|
||||
non_vec_env = make_simple_env(0, 0)()
|
||||
#
|
||||
# params = 10 * np.random.randn(dim)
|
||||
|
||||
out2 = non_vec_env.rollout(params, render=True)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user