This commit is contained in:
Maximilian Huettenrauch 2021-02-11 12:32:32 +01:00
parent c81378b9e7
commit 13a292f0e0
10 changed files with 116 additions and 155 deletions

View File

@ -37,7 +37,7 @@ class HoleReacher(gym.Env):
self.start_vel = np.zeros(self.num_links)
self.weight_matrix_scale = 50 # for the holereacher, the dmp weights become quite large compared to the values of the goal attractor. this scaling is to ensure they are on similar scale for the optimizer
self._dt = 0.01
self.dt = 0.01
self.time_limit = 2
action_bound = np.pi * np.ones((self.num_links,))
@ -82,9 +82,9 @@ class HoleReacher(gym.Env):
a single step with an action in joint velocity space
"""
vel = action
acc = (vel - self._angle_velocity) / self._dt
acc = (vel - self._angle_velocity) / self.dt
self._angle_velocity = vel
self._joint_angles = self._joint_angles + self._dt * self._angle_velocity
self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
self._update_joints()
@ -113,7 +113,7 @@ class HoleReacher(gym.Env):
self._steps += 1
done = self._steps * self._dt > self.time_limit or self._is_collided
done = self._steps * self.dt > self.time_limit or self._is_collided
return self._get_obs().copy(), reward, done, info

View File

@ -26,7 +26,7 @@ def make_env(rank, seed=0):
num_dof=5,
num_basis=5,
duration=2,
dt=env._dt,
dt=env.dt,
learn_goal=True)
env.seed(seed + rank)
return env

View File

@ -73,7 +73,7 @@ class AlrMujocoEnv(gym.Env):
# observation, _reward, done, _info = self.step(action)
# assert not done
observation = self.reset()
observation = self._get_obs() # TODO: is calling get_obs enough? should we call reset, or even step?
self._set_observation_space(observation)
@ -82,14 +82,14 @@ class AlrMujocoEnv(gym.Env):
@property
def current_pos(self):
"""
By default returns the joint positions of all simulated objects. May be overriden in subclass.
By default returns the joint positions of all simulated objects. May be overridden in subclass.
"""
return self.sim.data.qpos
@property
def current_vel(self):
"""
By default returns the joint velocities of all simulated objects. May be overriden in subclass.
By default returns the joint velocities of all simulated objects. May be overridden in subclass.
"""
return self.sim.data.qvel
@ -125,10 +125,15 @@ class AlrMujocoEnv(gym.Env):
# methods to override:
# ----------------------------
def _get_obs(self):
"""Returns the observation.
"""
raise NotImplementedError()
def configure(self, *args, **kwargs):
"""
Helper method to set certain environment properties such as contexts in contextual environments since reset()
doesn't take arguments. Should be called before/after reset(). TODO: before or after?
doesn't take arguments. Should be called before reset().
"""
pass

View File

@ -1,12 +1,12 @@
from gym.envs.mujoco import mujoco_env
from gym import utils
import os
import numpy as np
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
from alr_envs.mujoco import alr_mujoco_env
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
import mujoco_py
class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def __init__(self, ):
self._steps = 0
@ -21,8 +21,12 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
self._q_pos = []
utils.EzPickle.__init__(self)
mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
frame_skip=4)
alr_mujoco_env.AlrMujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), "assets", "ball-in-a-cup_base.xml"),
n_substeps=4)
def configure(self, context):
self.context = context
self.reward_function.reset(context)
def reset_model(self):
start_pos = self.init_qpos.copy()
@ -30,24 +34,8 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
start_vel = np.zeros_like(start_pos)
self.set_state(start_pos, start_vel)
self._steps = 0
self.reward_function.reset()
self._q_pos = []
def do_simulation(self, ctrl, n_frames):
self.sim.data.ctrl[:] = ctrl
for _ in range(n_frames):
try:
self.sim.step()
except mujoco_py.builder.MujocoException as e:
# print("Error in simulation: " + str(e))
# error = True
# Copy the current torque as if it would have been applied until the end of the trajectory
# for i in range(k + 1, sim_time):
# torques.append(trq)
return True
return False
def step(self, a):
# Apply gravity compensation
if not np.all(self.sim.data.qfrc_applied[:7] == self.sim.data.qfrc_bias[:7]):
@ -98,6 +86,7 @@ class ALRBallInACupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
if __name__ == "__main__":
env = ALRBallInACupEnv()
env.configure(None)
env.reset()
for i in range(2000):
# objective.load_result("/tmp/cma")

View File

@ -26,9 +26,9 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.dists_final = None
self.costs = None
self.reset()
self.reset(None)
def reset(self):
def reset(self, context):
self.ball_traj = np.zeros(shape=(self.sim_time, 3))
self.dists = []
self.dists_final = []
@ -51,11 +51,12 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
self.ball_traj[step, :] = ball_pos
if self.check_collision(sim):
return -1000, False, True
action_cost = np.sum(np.square(action))
if self.check_collision(sim):
reward = - 1e-5 * action_cost - 1000
return reward, False, True
if step == self.sim_time - 1:
min_dist = np.min(self.dists)
dist_final = self.dists_final[-1]

View File

@ -1,21 +1,16 @@
from alr_envs.mujoco import alr_mujoco_env
from gym import utils, spaces
from gym import utils
import os
import numpy as np
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def __init__(self, reward_function=None):
def __init__(self, n_substeps=4, apply_gravity_comp=True, reward_function=None):
self._steps = 0
self.xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets",
"biac_base" + ".xml")
self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / (0.0005 * 4)) # circular dependency.. sim.dt <-> mujocoenv init <-> reward fct
self.reward_function = reward_function(self.sim_steps)
self.start_pos = np.array([0.0, 0.58760536, 0.0, 1.36004913, 0.0, -0.32072943, -1.57])
self.start_vel = np.zeros(7)
@ -34,8 +29,15 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
utils.EzPickle.__init__(self)
alr_mujoco_env.AlrMujocoEnv.__init__(self,
self.xml_path,
apply_gravity_comp=True,
n_substeps=4)
apply_gravity_comp=apply_gravity_comp,
n_substeps=n_substeps)
self.sim_time = 8 # seconds
self.sim_steps = int(self.sim_time / self.dt)
if reward_function is None:
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
reward_function = BallInACupReward
self.reward_function = reward_function(self.sim_steps)
@property
def current_pos(self):
@ -47,6 +49,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def configure(self, context):
self.context = context
self.reward_function.reset(context)
def reset_model(self):
init_pos_all = self.init_qpos.copy()
@ -56,7 +59,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
goal_final_id = self.sim.model._site_name2id["cup_goal_final"]
self._steps = 0
self.reward_function.reset()
self._q_pos = []
self._q_vel = []
@ -65,38 +67,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
self.set_state(start_pos, init_vel)
# Reset the system
# self.sim.data.qpos[:] = init_pos_all
# self.sim.data.qvel[:] = init_vel
# self.sim.data.qpos[0:7] = init_pos_robot
#
# self.sim.step()
#
# self.sim.data.qpos[:] = init_pos_all
# self.sim.data.qvel[:] = init_vel
# self.sim.data.qpos[0:7] = init_pos_robot
# self.sim.data.body_xpos[ball_id, :] = np.copy(self.sim.data.site_xpos[goal_final_id, :]) - np.array([0., 0., 0.329])
#
# # Stabilize the system around the initial position
# for i in range(0, 500):
# self.sim.data.qpos[7:] = 0.
# self.sim.data.qvel[7:] = 0.
# # self.sim.data.qpos[7] = -0.2
# cur_pos = self.sim.data.qpos[0:7].copy()
# cur_vel = self.sim.data.qvel[0:7].copy()
# trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
# self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
# self.sim.step()
# self.render()
#
# for i in range(0, 500):
# cur_pos = self.sim.data.qpos[0:7].copy()
# cur_vel = self.sim.data.qvel[0:7].copy()
# trq = self.p_gains * (init_pos_robot - cur_pos) + self.d_gains * (np.zeros_like(init_pos_robot) - cur_vel)
# self.sim.data.qfrc_applied[0:7] = trq + self.sim.data.qfrc_bias[:7].copy()
# self.sim.step()
# self.render()
return self._get_obs()
def step(self, a):
@ -154,7 +124,10 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
if __name__ == "__main__":
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward_simple import BallInACupReward
env = ALRBallInACupEnv(reward_function=BallInACupReward)
env.configure(None)
env.reset()
env.render()
for i in range(4000):

View File

@ -7,9 +7,54 @@ import multiprocessing as mp
import sys
def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
assert shared_memory is None
env = env_fn()
parent_pipe.close()
try:
while True:
command, data = pipe.recv()
if command == 'reset':
observation = env.reset()
pipe.send((observation, True))
elif command == 'step':
observation, reward, done, info = env.step(data)
if done:
observation = env.reset()
pipe.send(((observation, reward, done, info), True))
elif command == 'rollout':
rewards = []
infos = []
for p, c in zip(*data):
reward, info = env.rollout(p, c)
rewards.append(reward)
infos.append(info)
pipe.send(((rewards, infos), (True, ) * len(rewards)))
elif command == 'seed':
env.seed(data)
pipe.send((None, True))
elif command == 'close':
env.close()
pipe.send((None, True))
break
elif command == 'idle':
pipe.send((None, True))
elif command == '_check_observation_space':
pipe.send((data == env.observation_space, True))
else:
raise RuntimeError('Received unknown command `{0}`. Must '
'be one of {`reset`, `step`, `seed`, `close`, '
'`_check_observation_space`}.'.format(command))
except (KeyboardInterrupt, Exception):
error_queue.put((index,) + sys.exc_info()[:2])
pipe.send((None, False))
finally:
env.close()
class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
def __init__(self, env_fns, n_samples, observation_space=None, action_space=None,
shared_memory=True, copy=True, context=None, daemon=True, worker=None):
shared_memory=False, copy=True, context="spawn", daemon=True, worker=_worker):
super(DmpAsyncVectorEnv, self).__init__(env_fns,
observation_space=observation_space,
action_space=action_space,
@ -91,7 +136,7 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
self._raise_if_errors(successes)
self._state = AsyncState.DEFAULT
observations_list, rewards, dones, infos = [_flatten_list(r) for r in zip(*results)]
rewards, infos = [_flatten_list(r) for r in zip(*results)]
# for now, we ignore the observations and only return the rewards
@ -109,55 +154,6 @@ class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
return self.rollout_wait()
def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
assert shared_memory is None
env = env_fn()
parent_pipe.close()
try:
while True:
command, data = pipe.recv()
if command == 'reset':
observation = env.reset()
pipe.send((observation, True))
elif command == 'step':
observation, reward, done, info = env.step(data)
if done:
observation = env.reset()
pipe.send(((observation, reward, done, info), True))
elif command == 'rollout':
observations = []
rewards = []
dones = []
infos = []
for p, c in zip(*data):
observation, reward, done, info = env.rollout(p, c)
observations.append(observation)
rewards.append(reward)
dones.append(done)
infos.append(info)
pipe.send(((observations, rewards, dones, infos), (True, ) * len(rewards)))
elif command == 'seed':
env.seed(data)
pipe.send((None, True))
elif command == 'close':
env.close()
pipe.send((None, True))
break
elif command == 'idle':
pipe.send((None, True))
elif command == '_check_observation_space':
pipe.send((data == env.observation_space, True))
else:
raise RuntimeError('Received unknown command `{0}`. Must '
'be one of {`reset`, `step`, `seed`, `close`, '
'`_check_observation_space`}.'.format(command))
except (KeyboardInterrupt, Exception):
error_queue.put((index,) + sys.exc_info()[:2])
pipe.send((None, False))
finally:
env.close()
def _flatten_obs(obs):
assert isinstance(obs, (list, tuple))
assert len(obs) > 0

View File

@ -69,15 +69,11 @@ class DmpEnvWrapper(gym.Wrapper):
def __call__(self, params, contexts=None):
params = np.atleast_2d(params)
observations = []
rewards = []
dones = []
infos = []
for p, c in zip(params, contexts):
observation, reward, done, info = self.rollout(p, c)
observations.append(observation)
reward, info = self.rollout(p, c)
rewards.append(reward)
dones.append(done)
infos.append(info)
return np.array(rewards), infos
@ -116,9 +112,8 @@ class DmpEnvWrapper(gym.Wrapper):
rews = []
infos = []
self.env.configure(context)
self.env.reset()
if context is not None:
self.env.configure(context)
for t, pos_vel in enumerate(zip(trajectory, velocity)):
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
@ -132,4 +127,4 @@ class DmpEnvWrapper(gym.Wrapper):
reward = np.sum(rews)
return obs, reward, done, info
return reward, info

View File

@ -1,4 +1,4 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapperVel
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
from alr_envs.classic_control.hole_reacher import HoleReacher
import numpy as np
@ -16,21 +16,25 @@ if __name__ == "__main__":
:param rank: (int) index of the subprocess
"""
def _init():
env = HoleReacher(num_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
hole_depth=1,
hole_x=1)
_env = HoleReacher(num_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
hole_depth=1,
hole_x=1)
env = DmpEnvWrapperVel(env,
num_dof=5,
num_basis=5,
duration=2,
dt=env._dt,
learn_goal=True)
env.seed(seed + rank)
return env
_env = DmpEnvWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
dt=_env.dt,
learn_goal=True,
alpha_phase=2,
start_pos=_env.start_pos,
policy_type="velocity"
)
_env.seed(seed + rank)
return _env
return _init
n_samples = 4
@ -45,6 +49,6 @@ if __name__ == "__main__":
params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
# env.reset()
out = env.rollout(params)
out = env(params)
print(out)

View File

@ -1,7 +1,6 @@
from alr_envs.utils.dmp_env_wrapper import DmpEnvWrapper
from alr_envs.utils.dmp_async_vec_env import DmpAsyncVectorEnv, _worker
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_simple import ALRBallInACupEnv
from alr_envs.mujoco.ball_in_a_cup.ball_in_a_cup_reward import BallInACupReward
import numpy as np
@ -17,7 +16,7 @@ if __name__ == "__main__":
:param rank: (int) index of the subprocess
"""
def _init():
_env = ALRBallInACupEnv(BallInACupReward)
_env = ALRBallInACupEnv()
_env = DmpEnvWrapper(_env,
num_dof=3,
@ -29,7 +28,6 @@ if __name__ == "__main__":
start_pos=_env.start_pos[1::2],
final_pos=_env.start_pos[1::2],
policy_type="motor"
# contextual=True
)
_env.seed(seed + rank)
return _env