removed legacy wrappers

This commit is contained in:
ottofabian 2021-05-17 17:59:28 +02:00
parent 14c60766c2
commit 528b7521b6
7 changed files with 0 additions and 587 deletions

View File

@ -1,88 +0,0 @@
from alr_envs.utils.policies import get_policy_class
from mp_lib import det_promp
import numpy as np
import gym
class DetPMPEnvWrapper(gym.Wrapper):
def __init__(self,
env,
num_dof,
num_basis,
width,
off=0.01,
start_pos=None,
duration=1,
dt=0.01,
post_traj_time=0.,
policy_type=None,
weights_scale=1,
zero_start=False,
zero_goal=False,
):
super(DetPMPEnvWrapper, self).__init__(env)
self.num_dof = num_dof
self.num_basis = num_basis
self.dim = num_dof * num_basis
self.pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
zero_start=zero_start, zero_goal=zero_goal)
weights = np.zeros(shape=(num_basis, num_dof))
self.pmp.set_weights(duration, weights)
self.weights_scale = weights_scale
self.duration = duration
self.dt = dt
self.post_traj_steps = int(post_traj_time / dt)
self.start_pos = start_pos
self.zero_start = zero_start
policy_class = get_policy_class(policy_type)
self.policy = policy_class(env)
def __call__(self, params, contexts=None):
params = np.atleast_2d(params)
rewards = []
infos = []
for p, c in zip(params, contexts):
reward, info = self.rollout(p, c)
rewards.append(reward)
infos.append(info)
return np.array(rewards), infos
def rollout(self, params, context=None, render=False):
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
params = np.reshape(params, newshape=(self.num_basis, self.num_dof)) * self.weights_scale
self.pmp.set_weights(self.duration, params)
t, des_pos, des_vel, des_acc = self.pmp.compute_trajectory(1 / self.dt, 1.)
if self.zero_start:
des_pos += self.start_pos[None, :]
if self.post_traj_steps > 0:
des_pos = np.vstack([des_pos, np.tile(des_pos[-1, :], [self.post_traj_steps, 1])])
des_vel = np.vstack([des_vel, np.zeros(shape=(self.post_traj_steps, self.num_dof))])
self._trajectory = des_pos
self._velocity = des_vel
rews = []
infos = []
self.env.configure(context)
self.env.reset()
for t, pos_vel in enumerate(zip(des_pos, des_vel)):
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
obs, rew, done, info = self.env.step(ac)
rews.append(rew)
infos.append(info)
if render:
self.env.render(mode="human")
if done:
break
reward = np.sum(rews)
return reward, info

View File

@ -1,173 +0,0 @@
import gym
from gym.error import (AlreadyPendingCallError, NoAsyncCallError)
from gym.vector.utils import concatenate, create_empty_array
from gym.vector.async_vector_env import AsyncState
import numpy as np
import multiprocessing as mp
import sys
def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
assert shared_memory is None
env = env_fn()
parent_pipe.close()
try:
while True:
command, data = pipe.recv()
if command == 'reset':
observation = env.reset()
pipe.send((observation, True))
elif command == 'step':
observation, reward, done, info = env.step(data)
if done:
observation = env.reset()
pipe.send(((observation, reward, done, info), True))
elif command == 'rollout':
rewards = []
infos = []
for p, c in zip(*data):
reward, info = env.rollout(p, c)
rewards.append(reward)
infos.append(info)
pipe.send(((rewards, infos), (True, ) * len(rewards)))
elif command == 'seed':
env.seed(data)
pipe.send((None, True))
elif command == 'close':
env.close()
pipe.send((None, True))
break
elif command == 'idle':
pipe.send((None, True))
elif command == '_check_observation_space':
pipe.send((data == env.observation_space, True))
else:
raise RuntimeError('Received unknown command `{0}`. Must '
'be one of {`reset`, `step`, `seed`, `close`, '
'`_check_observation_space`}.'.format(command))
except (KeyboardInterrupt, Exception):
error_queue.put((index,) + sys.exc_info()[:2])
pipe.send((None, False))
finally:
env.close()
class DmpAsyncVectorEnv(gym.vector.AsyncVectorEnv):
def __init__(self, env_fns, n_samples, observation_space=None, action_space=None,
shared_memory=False, copy=True, context="spawn", daemon=True, worker=_worker):
super(DmpAsyncVectorEnv, self).__init__(env_fns,
observation_space=observation_space,
action_space=action_space,
shared_memory=shared_memory,
copy=copy,
context=context,
daemon=daemon,
worker=worker)
# we need to overwrite the number of samples as we may sample more than num_envs
self.observations = create_empty_array(self.single_observation_space,
n=n_samples,
fn=np.zeros)
def __call__(self, params, contexts=None):
return self.rollout(params, contexts)
def rollout_async(self, params, contexts):
"""
Parameters
----------
params : iterable of samples from `action_space`
List of actions.
"""
self._assert_is_running()
if self._state != AsyncState.DEFAULT:
raise AlreadyPendingCallError('Calling `rollout_async` while waiting '
'for a pending call to `{0}` to complete.'.format(
self._state.value), self._state.value)
params = np.atleast_2d(params)
split_params = np.array_split(params, np.minimum(len(params), self.num_envs))
if contexts is None:
split_contexts = np.array_split([None, ] * len(params), np.minimum(len(params), self.num_envs))
else:
split_contexts = np.array_split(contexts, np.minimum(len(contexts), self.num_envs))
assert np.all([len(p) == len(c) for p, c in zip(split_params, split_contexts)])
for pipe, param, context in zip(self.parent_pipes, split_params, split_contexts):
pipe.send(('rollout', (param, context)))
for pipe in self.parent_pipes[len(split_params):]:
pipe.send(('idle', None))
self._state = AsyncState.WAITING_ROLLOUT
def rollout_wait(self, timeout=None):
"""
Parameters
----------
timeout : int or float, optional
Number of seconds before the call to `step_wait` times out. If
`None`, the call to `step_wait` never times out.
Returns
-------
observations : sample from `observation_space`
A batch of observations from the vectorized environment.
rewards : `np.ndarray` instance (dtype `np.float_`)
A vector of rewards from the vectorized environment.
dones : `np.ndarray` instance (dtype `np.bool_`)
A vector whose entries indicate whether the episode has ended.
infos : list of dict
A list of auxiliary diagnostic information.
"""
self._assert_is_running()
if self._state != AsyncState.WAITING_ROLLOUT:
raise NoAsyncCallError('Calling `rollout_wait` without any prior call '
'to `rollout_async`.', AsyncState.WAITING_ROLLOUT.value)
if not self._poll(timeout):
self._state = AsyncState.DEFAULT
raise mp.TimeoutError('The call to `rollout_wait` has timed out after '
'{0} second{1}.'.format(timeout, 's' if timeout > 1 else ''))
results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
results = [r for r in results if r is not None]
self._raise_if_errors(successes)
self._state = AsyncState.DEFAULT
rewards, infos = [_flatten_list(r) for r in zip(*results)]
# for now, we ignore the observations and only return the rewards
# if not self.shared_memory:
# self.observations = concatenate(observations_list, self.observations,
# self.single_observation_space)
# return (deepcopy(self.observations) if self.copy else self.observations,
# np.array(rewards), np.array(dones, dtype=np.bool_), infos)
return np.array(rewards), infos
def rollout(self, actions, contexts):
self.rollout_async(actions, contexts)
return self.rollout_wait()
def _flatten_obs(obs):
assert isinstance(obs, (list, tuple))
assert len(obs) > 0
if isinstance(obs[0], dict):
keys = obs[0].keys()
return {k: np.stack([o[k] for o in obs]) for k in keys}
else:
return np.stack(obs)
def _flatten_list(l):
assert isinstance(l, (list, tuple))
assert len(l) > 0
assert all([len(l_) > 0 for l_ in l])
return [l__ for l_ in l for l__ in l_]

View File

@ -1,125 +0,0 @@
from alr_envs.utils.policies import get_policy_class
from mp_lib.phase import ExpDecayPhaseGenerator
from mp_lib.basis import DMPBasisGenerator
from mp_lib import dmps
import numpy as np
import gym
class DmpEnvWrapper(gym.Wrapper):
def __init__(self,
env,
num_dof,
num_basis,
start_pos=None,
final_pos=None,
duration=1,
dt=0.01,
alpha_phase=2,
bandwidth_factor=3,
learn_goal=False,
post_traj_time=0.,
policy_type=None,
weights_scale=1.,
goal_scale=1.,
):
super(DmpEnvWrapper, self).__init__(env)
self.num_dof = num_dof
self.num_basis = num_basis
self.dim = num_dof * num_basis
if learn_goal:
self.dim += num_dof
self.learn_goal = learn_goal
self.duration = duration # seconds
time_steps = int(duration / dt)
self.t = np.linspace(0, duration, time_steps)
self.post_traj_steps = int(post_traj_time / dt)
phase_generator = ExpDecayPhaseGenerator(alpha_phase=alpha_phase, duration=duration)
basis_generator = DMPBasisGenerator(phase_generator,
duration=duration,
num_basis=self.num_basis,
basis_bandwidth_factor=bandwidth_factor)
self.dmp = dmps.DMP(num_dof=num_dof,
basis_generator=basis_generator,
phase_generator=phase_generator,
num_time_steps=time_steps,
dt=dt
)
self.dmp.dmp_start_pos = start_pos.reshape((1, num_dof))
dmp_weights = np.zeros((num_basis, num_dof))
if learn_goal:
dmp_goal_pos = np.zeros(num_dof)
else:
dmp_goal_pos = final_pos
self.dmp.set_weights(dmp_weights, dmp_goal_pos)
self.weights_scale = weights_scale
self.goal_scale = goal_scale
policy_class = get_policy_class(policy_type)
self.policy = policy_class(env)
def __call__(self, params, contexts=None):
params = np.atleast_2d(params)
rewards = []
infos = []
for p, c in zip(params, contexts):
reward, info = self.rollout(p, c)
rewards.append(reward)
infos.append(info)
return np.array(rewards), infos
def goal_and_weights(self, params):
if len(params.shape) > 1:
assert params.shape[1] == self.dim
else:
assert len(params) == self.dim
params = np.reshape(params, [1, self.dim])
if self.learn_goal:
goal_pos = params[0, -self.num_dof:]
weight_matrix = np.reshape(params[:, :-self.num_dof], [self.num_basis, self.num_dof])
else:
goal_pos = self.dmp.dmp_goal_pos.flatten()
assert goal_pos is not None
weight_matrix = np.reshape(params, [self.num_basis, self.num_dof])
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
def rollout(self, params, context=None, render=False):
""" This function generates a trajectory based on a DMP and then does the usual loop over reset and step"""
goal_pos, weight_matrix = self.goal_and_weights(params)
self.dmp.set_weights(weight_matrix, goal_pos)
trajectory, velocity = self.dmp.reference_trajectory(self.t)
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.num_dof))])
self._trajectory = trajectory
self._velocity = velocity
rews = []
infos = []
self.env.configure(context)
self.env.reset()
for t, pos_vel in enumerate(zip(trajectory, velocity)):
ac = self.policy.get_action(pos_vel[0], pos_vel[1])
obs, rew, done, info = self.env.step(ac)
rews.append(rew)
infos.append(info)
if render:
self.env.render(mode="human")
if done:
break
reward = np.sum(rews)
return reward, info

View File

@ -1,28 +0,0 @@
from alr_envs.utils.legacy.utils import make_holereacher_env
import numpy as np
if __name__ == "__main__":
n_samples = 1
n_cpus = 4
dim = 30
# env = DmpAsyncVectorEnv([make_viapointreacher_env(i) for i in range(n_cpus)],
# n_samples=n_samples)
test_env = make_holereacher_env(0)()
# params = np.random.randn(n_samples, dim)
params = np.array([[1.386102, -3.29980525, 4.70402733, 1.3966668, 0.73774902,
3.14676681, -4.98644416, 6.20303193, 1.30502127, -0.09330522,
7.62656797, -5.76893033, 3.4706711, -0.6944142, -3.33442788,
12.31421548, -0.72760271, -6.9090723, 7.02903814, -8.7236836,
1.4805914, 0.53185824, -5.46626893, 0.69692163, 13.58472666,
0.77199316, 2.02906724, -3.0203244, -1.00533159, -0.57417351]])
# params = np.hstack([50 * np.random.randn(n_samples, 25), np.tile(np.array([np.pi/2, -np.pi/4, -np.pi/4, -np.pi/4, -np.pi/4]), [n_samples, 1])])
rew, info = test_env.rollout(params, render=True)
print(rew)
# out = env(params)
# print(out)

View File

@ -1,28 +0,0 @@
from alr_envs.mujoco.ball_in_a_cup.utils import make_simple_dmp_env
import numpy as np
if __name__ == "__main__":
dim = 15
n_cpus = 4
# n_samples = 10
#
# vec_env = DmpAsyncVectorEnv([make_simple_env(i) for i in range(n_cpus)],
# n_samples=n_samples)
#
# params = np.tile(1 * np.random.randn(n_samples, dim), (10, 1))
#
# rewards, infos = vec_env(params)
# print(rewards)
#
non_vec_env = make_simple_dmp_env(0, 0)()
# params = 0.5 * np.random.randn(dim)
params = np.array([-2.63357598, -1.04950296, -0.44330737, 0.52950017, 4.29247739,
4.52473661, -0.05685977, -0.76796851, 3.71540499, 1.22631059,
2.20412438, 3.91588129, -0.12652723, -3.0788211 , 0.56204464])
out2 = non_vec_env.rollout(params, render=True )
print(out2)

View File

@ -1,145 +0,0 @@
import alr_envs.classic_control.hole_reacher as hr
import alr_envs.classic_control.viapoint_reacher as vpr
from alr_envs.utils.mps.dmp_wrapper import DmpWrapper
from alr_envs.utils.mps.detpmp_wrapper import DetPMPWrapper
import numpy as np
def make_viapointreacher_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
_env = vpr.ViaPointReacher(n_links=5,
allow_self_collision=False,
collision_penalty=1000)
_env = DmpWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
alpha_phase=2.5,
dt=_env.dt,
start_pos=_env.start_pos,
learn_goal=False,
policy_type="velocity",
weights_scale=50)
_env.seed(seed + rank)
return _env
return _init
def make_holereacher_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
_env = hr.HoleReacherEnv(n_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.25,
hole_depth=1,
hole_x=2,
collision_penalty=100)
_env = DmpWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
bandwidth_factor=2,
dt=_env.dt,
learn_goal=True,
alpha_phase=2,
start_pos=_env._start_pos,
policy_type="velocity",
weights_scale=50,
goal_scale=0.1
)
_env.seed(seed + rank)
return _env
return _init
def make_holereacher_fix_goal_env(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
_env = hr.HoleReacherEnv(n_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
hole_depth=1,
hole_x=1,
collision_penalty=100)
_env = DmpWrapper(_env,
num_dof=5,
num_basis=5,
duration=2,
dt=_env.dt,
learn_goal=False,
final_pos=np.array([2.02669572, -1.25966385, -1.51618198, -0.80946476, 0.02012344]),
alpha_phase=2,
start_pos=_env._start_pos,
policy_type="velocity",
weights_scale=50,
goal_scale=1
)
_env.seed(seed + rank)
return _env
return _init
def make_holereacher_env_pmp(rank, seed=0):
"""
Utility function for multiprocessed env.
:param env_id: (str) the environment ID
:param num_env: (int) the number of environments you wish to have in subprocesses
:param seed: (int) the initial seed for RNG
:param rank: (int) index of the subprocess
:returns a function that generates an environment
"""
def _init():
_env = hr.HoleReacherEnv(n_links=5,
allow_self_collision=False,
allow_wall_collision=False,
hole_width=0.15,
hole_depth=1,
hole_x=1,
collision_penalty=1000)
_env = DetPMPWrapper(_env, num_dof=5, num_basis=5, width=0.02, duration=2, dt=_env.dt, post_traj_time=0,
policy_type="velocity", weights_scale=0.2, zero_start=True, zero_goal=False)
_env.seed(seed + rank)
return _env
return _init