updates
This commit is contained in:
parent
c81378b9e7
commit
13a292f0e0
@ -37,7 +37,7 @@ class HoleReacher(gym.Env):
|
|||||||
self.start_vel = np.zeros(self.num_links)
|
self.start_vel = np.zeros(self.num_links)
|
||||||
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
|
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
|
||||||
|
|
||||||
self._dt = 0.01
|
self.dt = 0.01
|
||||||
self.time_limit = 2
|
self.time_limit = 2
|
||||||
|
|
||||||
action_bound = np.pi * np.ones((self.num_links,))
|
action_bound = np.pi * np.ones((self.num_links,))
|
||||||
@ -82,9 +82,9 @@ class HoleReacher(gym.Env):
|
|||||||
a single step with an action in joint velocity space
|
a single step with an action in joint velocity space
|
||||||
"""
|
"""
|
||||||
vel = action
|
vel = action
|
||||||
acc = (vel - self._angle_velocity) / self._dt
|
acc = (vel - self._angle_velocity) / self.dt
|
||||||
self._angle_velocity = vel
|
self._angle_velocity = vel
|
||||||
self._joint_angles = self._joint_angles + self._dt * self._angle_velocity
|
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
|
||||||
|
|
||||||
self._update_joints()
|
self._update_joints()
|
||||||
|
|
||||||
@ -113,7 +113,7 @@ class HoleReacher(gym.Env):
|
|||||||
|
|
||||||
self._steps += 1
|
self._steps += 1
|
||||||
|
|
||||||
done = self._steps * self._dt > self.time_limit or self._is_collided
|
done = self._steps * self.dt > self.time_limit or self._is_collided
|
||||||
|
|
||||||
return self._get_obs().copy(), reward, done, info
|
return self._get_obs().copy(), reward, done, info
|
||||||
|
|
||||||
|
@ -26,7 +26,7 @@ def make_env(rank, seed=0):
|
|||||||
num_dof=5,
|
num_dof=5,
|
||||||
num_basis=5,
|
num_basis=5,
|
||||||
duration=2,
|
duration=2,
|
||||||
dt=env._dt,
|
dt=env.dt,
|
||||||
learn_goal=True)
|
learn_goal=True)
|
||||||
env.seed(seed + rank)
|
env.seed(seed + rank)
|
||||||
return env
|
return env
|
||||||
|
@ -73,7 +73,7 @@ class AlrMujocoEnv(gym.Env):
|
|||||||
# observation, _reward, done, _info = self.step(action)
|
# observation, _reward, done, _info = self.step(action)
|
||||||
# assert not done
|
# assert not done
|
||||||
|
|
||||||
observation = self.reset()
|
observation = self._get_obs() # TODO: is calling get_obs enough? should we call reset, or even step?
|
||||||
|
|
||||||
self._set_observation_space(observation)
|
self._set_observation_space(observation)
|
||||||
|
|
||||||
@ -82,14 +82,14 @@ class AlrMujocoEnv(gym.Env):
|
|||||||
@property
|
@property
|
||||||
def current_pos(self):
|
def current_pos(self):
|
||||||
"""
|
"""
|
||||||
By default returns the joint positions of all simulated objects. May be overriden in subclass.
|
By default returns the joint positions of all simulated objects. May be overridden in subclass.
|
||||||
"""
|
"""
|
||||||
return self.sim.data.qpos
|
return self.sim.data.qpos
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_vel(self):
|
def current_vel(self):
|
||||||
"""
|
"""
|
||||||
By default returns the joint velocities of all simulated objects. May be overriden in subclass.
|
By default returns the joint velocities of all simulated objects. May be overridden in subclass.
|
||||||
"""
|
"""
|
||||||
return self.sim.data.qvel
|
return self.sim.data.qvel
|
||||||
|
|
||||||
@ -125,10 +125,15 @@ class AlrMujocoEnv(gym.Env):
|
|||||||
# methods to override:
|
# methods to override:
|
||||||
# ----------------------------
|
# ----------------------------
|
||||||
|
|
||||||
|
def _get_obs(self):
|
||||||
|
"""Returns the observation.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
def configure(self, *args, **kwargs):
|
def configure(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Helper method to set certain environment properties such as contexts in contextual environments since reset()
|
Helper method to set certain environment properties such as contexts in contextual environments since reset()
|
||||||
doesn't take arguments. Should be called before/after reset(). TODO: before or after?
|
doesn't take arguments. Should be called before reset().
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
from gym.envs.mujoco import mujoco_env
|
|
||||||
from gym import utils
|
from gym import utils
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
from alr_envs.mujoco import alr_mujoco_env
|
||||||
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
|
||||||
import mujoco_py
|
import mujoco_py
|
||||||
|
|
||||||
|
|
||||||
class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, ):
|
def __init__(self, ):
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
|
||||||
@ -21,8 +21,12 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
self._q_pos = []
|
self._q_pos = []
|
||||||
|
|
||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
|
alr_mujoco_env.AlrMujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
|
||||||
frame_skip=4)
|
n_substeps=4)
|
||||||
|
|
||||||
|
def configure(self, context):
|
||||||
|
self.context = context
|
||||||
|
self.reward_function.reset(context)
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
start_pos = self.init_qpos.copy()
|
start_pos = self.init_qpos.copy()
|
||||||
@ -30,24 +34,8 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
start_vel = np.zeros_like(start_pos)
|
start_vel = np.zeros_like(start_pos)
|
||||||
self.set_state(start_pos, start_vel)
|
self.set_state(start_pos, start_vel)
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
self.reward_function.reset()
|
|
||||||
self._q_pos = []
|
self._q_pos = []
|
||||||
|
|
||||||
def do_simulation(self, ctrl, n_frames):
|
|
||||||
self.sim.data.ctrl[:] = ctrl
|
|
||||||
for _ in range(n_frames):
|
|
||||||
try:
|
|
||||||
self.sim.step()
|
|
||||||
except mujoco_py.builder.MujocoException as e:
|
|
||||||
# print("Error in simulation: " + str(e))
|
|
||||||
# error = True
|
|
||||||
# Copy the current torque as if it would have been applied until the end of the trajectory
|
|
||||||
# for i in range(k + 1, sim_time):
|
|
||||||
# torques.append(trq)
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
# Apply gravity compensation
|
# Apply gravity compensation
|
||||||
if not np.all(self.sim.data.qfrc_applied[:7] == self.sim.data.qfrc_bias[:7]):
|
if not np.all(self.sim.data.qfrc_applied[:7] == self.sim.data.qfrc_bias[:7]):
|
||||||
@ -98,6 +86,7 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
env = ALRBallInACupEnv()
|
env = ALRBallInACupEnv()
|
||||||
|
env.configure(None)
|
||||||
env.reset()
|
env.reset()
|
||||||
for i in range(2000):
|
for i in range(2000):
|
||||||
# objective.load_result("/tmp/cma")
|
# objective.load_result("/tmp/cma")
|
||||||
|
@ -26,9 +26,9 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.dists_final = None
|
self.dists_final = None
|
||||||
self.costs = None
|
self.costs = None
|
||||||
|
|
||||||
self.reset()
|
self.reset(None)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self, context):
|
||||||
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
|
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
|
||||||
self.dists = []
|
self.dists = []
|
||||||
self.dists_final = []
|
self.dists_final = []
|
||||||
@ -51,11 +51,12 @@ class BallInACupReward(alr_reward_fct.AlrReward):
|
|||||||
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
|
||||||
self.ball_traj[step, :] = ball_pos
|
self.ball_traj[step, :] = ball_pos
|
||||||
|
|
||||||
if self.check_collision(sim):
|
|
||||||
return -1000, False, True
|
|
||||||
|
|
||||||
action_cost = np.sum(np.square(action))
|
action_cost = np.sum(np.square(action))
|
||||||
|
|
||||||
|
if self.check_collision(sim):
|
||||||
|
reward = - 1e-5 * action_cost - 1000
|
||||||
|
return reward, False, True
|
||||||
|
|
||||||
if step == self.sim_time - 1:
|
if step == self.sim_time - 1:
|
||||||
min_dist = np.min(self.dists)
|
min_dist = np.min(self.dists)
|
||||||
dist_final = self.dists_final[-1]
|
dist_final = self.dists_final[-1]
|
@ -1,21 +1,16 @@
|
|||||||
from alr_envs.mujoco import alr_mujoco_env
|
from alr_envs.mujoco import alr_mujoco_env
|
||||||
from gym import utils, spaces
|
from gym import utils
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
|
||||||
|
|
||||||
|
|
||||||
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
||||||
def __init__(self, reward_function=None):
|
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
|
|
||||||
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
|
||||||
"biac_base" + ".xml")
|
"biac_base" + ".xml")
|
||||||
|
|
||||||
self.sim_time = 8 # seconds
|
|
||||||
self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct
|
|
||||||
self.reward_function = reward_function(self.sim_steps)
|
|
||||||
|
|
||||||
self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
|
self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
|
||||||
self.start_vel = np.zeros(7)
|
self.start_vel = np.zeros(7)
|
||||||
|
|
||||||
@ -34,8 +29,15 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
utils.EzPickle.__init__(self)
|
utils.EzPickle.__init__(self)
|
||||||
alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
alr_mujoco_env.AlrMujocoEnv.__init__(self,
|
||||||
self.xml_path,
|
self.xml_path,
|
||||||
apply_gravity_comp=True,
|
apply_gravity_comp=apply_gravity_comp,
|
||||||
n_substeps=4)
|
n_substeps=n_substeps)
|
||||||
|
|
||||||
|
self.sim_time = 8 # seconds
|
||||||
|
self.sim_steps = int(self.sim_time / self.dt)
|
||||||
|
if reward_function is None:
|
||||||
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
|
||||||
|
reward_function = BallInACupReward
|
||||||
|
self.reward_function = reward_function(self.sim_steps)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def current_pos(self):
|
def current_pos(self):
|
||||||
@ -47,6 +49,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
def configure(self, context):
|
def configure(self, context):
|
||||||
self.context = context
|
self.context = context
|
||||||
|
self.reward_function.reset(context)
|
||||||
|
|
||||||
def reset_model(self):
|
def reset_model(self):
|
||||||
init_pos_all = self.init_qpos.copy()
|
init_pos_all = self.init_qpos.copy()
|
||||||
@ -56,7 +59,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
goal_final_id = self.sim.model._site_name2id["cup_goal_final"]
|
goal_final_id = self.sim.model._site_name2id["cup_goal_final"]
|
||||||
|
|
||||||
self._steps = 0
|
self._steps = 0
|
||||||
self.reward_function.reset()
|
|
||||||
self._q_pos = []
|
self._q_pos = []
|
||||||
self._q_vel = []
|
self._q_vel = []
|
||||||
|
|
||||||
@ -65,38 +67,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
self.set_state(start_pos, init_vel)
|
self.set_state(start_pos, init_vel)
|
||||||
|
|
||||||
# Reset the system
|
|
||||||
# self.sim.data.qpos[:] = init_pos_all
|
|
||||||
# self.sim.data.qvel[:] = init_vel
|
|
||||||
# self.sim.data.qpos[0:7] = init_pos_robot
|
|
||||||
#
|
|
||||||
# self.sim.step()
|
|
||||||
#
|
|
||||||
# self.sim.data.qpos[:] = init_pos_all
|
|
||||||
# self.sim.data.qvel[:] = init_vel
|
|
||||||
# self.sim.data.qpos[0:7] = init_pos_robot
|
|
||||||
# self.sim.data.body_xpos[ball_id, :] = np.copy(self.sim.data.site_xpos[goal_final_id, :]) - np.array([0., 0., 0.329])
|
|
||||||
#
|
|
||||||
# # Stabilize the system around the initial position
|
|
||||||
# for i in range(0, 500):
|
|
||||||
# self.sim.data.qpos[7:] = 0.
|
|
||||||
# self.sim.data.qvel[7:] = 0.
|
|
||||||
# # self.sim.data.qpos[7] = -0.2
|
|
||||||
# cur_pos = self.sim.data.qpos[0:7].copy()
|
|
||||||
# cur_vel = self.sim.data.qvel[0:7].copy()
|
|
||||||
# trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
|
|
||||||
# self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
|
|
||||||
# self.sim.step()
|
|
||||||
# self.render()
|
|
||||||
#
|
|
||||||
# for i in range(0, 500):
|
|
||||||
# cur_pos = self.sim.data.qpos[0:7].copy()
|
|
||||||
# cur_vel = self.sim.data.qvel[0:7].copy()
|
|
||||||
# trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
|
|
||||||
# self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
|
|
||||||
# self.sim.step()
|
|
||||||
# self.render()
|
|
||||||
|
|
||||||
return self._get_obs()
|
return self._get_obs()
|
||||||
|
|
||||||
def step(self, a):
|
def step(self, a):
|
||||||
@ -154,7 +124,10 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
|
||||||
|
|
||||||
env = ALRBallInACupEnv(reward_function=BallInACupReward)
|
env = ALRBallInACupEnv(reward_function=BallInACupReward)
|
||||||
|
env.configure(None)
|
||||||
env.reset()
|
env.reset()
|
||||||
env.render()
|
env.render()
|
||||||
for i in range(4000):
|
for i in range(4000):
|
||||||
|
@ -7,9 +7,54 @@ import multiprocessing as mp
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
|
||||||
|
assert shared_memory is None
|
||||||
|
env = env_fn()
|
||||||
|
parent_pipe.close()
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
command, data = pipe.recv()
|
||||||
|
if command == 'reset':
|
||||||
|
observation = env.reset()
|
||||||
|
pipe.send((observation, True))
|
||||||
|
elif command == 'step':
|
||||||
|
observation, reward, done, info = env.step(data)
|
||||||
|
if done:
|
||||||
|
observation = env.reset()
|
||||||
|
pipe.send(((observation, reward, done, info), True))
|
||||||
|
elif command == 'rollout':
|
||||||
|
rewards = []
|
||||||
|
infos = []
|
||||||
|
for p, c in zip(*data):
|
||||||
|
reward, info = env.rollout(p, c)
|
||||||
|
rewards.append(reward)
|
||||||
|
infos.append(info)
|
||||||
|
pipe.send(((rewards, infos), (True, ) * len(rewards)))
|
||||||
|
elif command == 'seed':
|
||||||
|
env.seed(data)
|
||||||
|
pipe.send((None, True))
|
||||||
|
elif command == 'close':
|
||||||
|
env.close()
|
||||||
|
pipe.send((None, True))
|
||||||
|
break
|
||||||
|
elif command == 'idle':
|
||||||
|
pipe.send((None, True))
|
||||||
|
elif command == '_check_observation_space':
|
||||||
|
pipe.send((data == env.observation_space, True))
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Received unknown command `{0}`. Must '
|
||||||
|
'be one of {`reset`, `step`, `seed`, `close`, '
|
||||||
|
'`_check_observation_space`}.'.format(command))
|
||||||
|
except (KeyboardInterrupt, Exception):
|
||||||
|
error_queue.put((index,) + sys.exc_info()[:2])
|
||||||
|
pipe.send((None, False))
|
||||||
|
finally:
|
||||||
|
env.close()
|
||||||
|
|
||||||
|
|
||||||
class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
|
class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
|
||||||
def __init__(self, env_fns, n_samples, observation_space=None, action_space=None,
|
def __init__(self, env_fns, n_samples, observation_space=None, action_space=None,
|
||||||
shared_memory=True, copy=True, context=None, daemon=True, worker=None):
|
shared_memory=False, copy=True, context="spawn", daemon=True, worker=_worker):
|
||||||
super(DmpAsyncVectorEnv, self).__init__(env_fns,
|
super(DmpAsyncVectorEnv, self).__init__(env_fns,
|
||||||
observation_space=observation_space,
|
observation_space=observation_space,
|
||||||
action_space=action_space,
|
action_space=action_space,
|
||||||
@ -91,7 +136,7 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
|
|||||||
self._raise_if_errors(successes)
|
self._raise_if_errors(successes)
|
||||||
self._state = AsyncState.DEFAULT
|
self._state = AsyncState.DEFAULT
|
||||||
|
|
||||||
observations_list, rewards, dones, infos = [_flatten_list(r) for r in zip(*results)]
|
rewards, infos = [_flatten_list(r) for r in zip(*results)]
|
||||||
|
|
||||||
# for now, we ignore the observations and only return the rewards
|
# for now, we ignore the observations and only return the rewards
|
||||||
|
|
||||||
@ -109,55 +154,6 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
|
|||||||
return self.rollout_wait()
|
return self.rollout_wait()
|
||||||
|
|
||||||
|
|
||||||
def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
|
|
||||||
assert shared_memory is None
|
|
||||||
env = env_fn()
|
|
||||||
parent_pipe.close()
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
command, data = pipe.recv()
|
|
||||||
if command == 'reset':
|
|
||||||
observation = env.reset()
|
|
||||||
pipe.send((observation, True))
|
|
||||||
elif command == 'step':
|
|
||||||
observation, reward, done, info = env.step(data)
|
|
||||||
if done:
|
|
||||||
observation = env.reset()
|
|
||||||
pipe.send(((observation, reward, done, info), True))
|
|
||||||
elif command == 'rollout':
|
|
||||||
observations = []
|
|
||||||
rewards = []
|
|
||||||
dones = []
|
|
||||||
infos = []
|
|
||||||
for p, c in zip(*data):
|
|
||||||
observation, reward, done, info = env.rollout(p, c)
|
|
||||||
observations.append(observation)
|
|
||||||
rewards.append(reward)
|
|
||||||
dones.append(done)
|
|
||||||
infos.append(info)
|
|
||||||
pipe.send(((observations, rewards, dones, infos), (True, ) * len(rewards)))
|
|
||||||
elif command == 'seed':
|
|
||||||
env.seed(data)
|
|
||||||
pipe.send((None, True))
|
|
||||||
elif command == 'close':
|
|
||||||
env.close()
|
|
||||||
pipe.send((None, True))
|
|
||||||
break
|
|
||||||
elif command == 'idle':
|
|
||||||
pipe.send((None, True))
|
|
||||||
elif command == '_check_observation_space':
|
|
||||||
pipe.send((data == env.observation_space, True))
|
|
||||||
else:
|
|
||||||
raise RuntimeError('Received unknown command `{0}`. Must '
|
|
||||||
'be one of {`reset`, `step`, `seed`, `close`, '
|
|
||||||
'`_check_observation_space`}.'.format(command))
|
|
||||||
except (KeyboardInterrupt, Exception):
|
|
||||||
error_queue.put((index,) + sys.exc_info()[:2])
|
|
||||||
pipe.send((None, False))
|
|
||||||
finally:
|
|
||||||
env.close()
|
|
||||||
|
|
||||||
|
|
||||||
def _flatten_obs(obs):
|
def _flatten_obs(obs):
|
||||||
assert isinstance(obs, (list, tuple))
|
assert isinstance(obs, (list, tuple))
|
||||||
assert len(obs) > 0
|
assert len(obs) > 0
|
||||||
|
@ -69,15 +69,11 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
|
|
||||||
def __call__(self, params, contexts=None):
|
def __call__(self, params, contexts=None):
|
||||||
params = np.atleast_2d(params)
|
params = np.atleast_2d(params)
|
||||||
observations = []
|
|
||||||
rewards = []
|
rewards = []
|
||||||
dones = []
|
|
||||||
infos = []
|
infos = []
|
||||||
for p, c in zip(params, contexts):
|
for p, c in zip(params, contexts):
|
||||||
observation, reward, done, info = self.rollout(p, c)
|
reward, info = self.rollout(p, c)
|
||||||
observations.append(observation)
|
|
||||||
rewards.append(reward)
|
rewards.append(reward)
|
||||||
dones.append(done)
|
|
||||||
infos.append(info)
|
infos.append(info)
|
||||||
|
|
||||||
return np.array(rewards), infos
|
return np.array(rewards), infos
|
||||||
@ -116,9 +112,8 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
rews = []
|
rews = []
|
||||||
infos = []
|
infos = []
|
||||||
|
|
||||||
|
self.env.configure(context)
|
||||||
self.env.reset()
|
self.env.reset()
|
||||||
if context is not None:
|
|
||||||
self.env.configure(context)
|
|
||||||
|
|
||||||
for t, pos_vel in enumerate(zip(trajectory, velocity)):
|
for t, pos_vel in enumerate(zip(trajectory, velocity)):
|
||||||
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
|
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
|
||||||
@ -132,4 +127,4 @@ class DmpEnvWrapper(gym.Wrapper):
|
|||||||
|
|
||||||
reward = np.sum(rews)
|
reward = np.sum(rews)
|
||||||
|
|
||||||
return obs, reward, done, info
|
return reward, info
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
|
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
||||||
from alr_envs.classic_control.hole_reacher import HoleReacher
|
from alr_envs.classic_control.hole_reacher import HoleReacher
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -16,21 +16,25 @@ if __name__ == "__main__":
|
|||||||
:param rank: (int) index of the subprocess
|
:param rank: (int) index of the subprocess
|
||||||
"""
|
"""
|
||||||
def _init():
|
def _init():
|
||||||
env = HoleReacher(num_links=5,
|
_env = HoleReacher(num_links=5,
|
||||||
allow_self_collision=False,
|
allow_self_collision=False,
|
||||||
allow_wall_collision=False,
|
allow_wall_collision=False,
|
||||||
hole_width=0.15,
|
hole_width=0.15,
|
||||||
hole_depth=1,
|
hole_depth=1,
|
||||||
hole_x=1)
|
hole_x=1)
|
||||||
|
|
||||||
env = DmpEnvWrapperVel(env,
|
_env = DmpEnvWrapper(_env,
|
||||||
num_dof=5,
|
num_dof=5,
|
||||||
num_basis=5,
|
num_basis=5,
|
||||||
duration=2,
|
duration=2,
|
||||||
dt=env._dt,
|
dt=_env.dt,
|
||||||
learn_goal=True)
|
learn_goal=True,
|
||||||
env.seed(seed + rank)
|
alpha_phase=2,
|
||||||
return env
|
start_pos=_env.start_pos,
|
||||||
|
policy_type="velocity"
|
||||||
|
)
|
||||||
|
_env.seed(seed + rank)
|
||||||
|
return _env
|
||||||
return _init
|
return _init
|
||||||
|
|
||||||
n_samples = 4
|
n_samples = 4
|
||||||
@ -45,6 +49,6 @@ if __name__ == "__main__":
|
|||||||
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
|
||||||
|
|
||||||
# env.reset()
|
# env.reset()
|
||||||
out = env.rollout(params)
|
out = env(params)
|
||||||
|
|
||||||
print(out)
|
print(out)
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
|
||||||
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
|
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
|
||||||
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
@ -17,7 +16,7 @@ if __name__ == "__main__":
|
|||||||
:param rank: (int) index of the subprocess
|
:param rank: (int) index of the subprocess
|
||||||
"""
|
"""
|
||||||
def _init():
|
def _init():
|
||||||
_env = ALRBallInACupEnv(BallInACupReward)
|
_env = ALRBallInACupEnv()
|
||||||
|
|
||||||
_env = DmpEnvWrapper(_env,
|
_env = DmpEnvWrapper(_env,
|
||||||
num_dof=3,
|
num_dof=3,
|
||||||
@ -29,7 +28,6 @@ if __name__ == "__main__":
|
|||||||
start_pos=_env.start_pos[1::2],
|
start_pos=_env.start_pos[1::2],
|
||||||
final_pos=_env.start_pos[1::2],
|
final_pos=_env.start_pos[1::2],
|
||||||
policy_type="motor"
|
policy_type="motor"
|
||||||
# contextual=True
|
|
||||||
)
|
)
|
||||||
_env.seed(seed + rank)
|
_env.seed(seed + rank)
|
||||||
return _env
|
return _env
|
||||||
|
Loading…
Reference in New Issue
Block a user