diff --git a/alr_envs/__init__.py b/alr_envs/__init__.py
index fa73b7a..986265c 100644
--- a/alr_envs/__init__.py
+++ b/alr_envs/__init__.py
@@ -272,12 +272,20 @@ for v in versions:
}
)
-# register(
-# id='HoleReacherDetPMP-v0',
-# entry_point='alr_envs.classic_control.hole_reacher:holereacher_detpmp',
-# # max_episode_steps=1,
-# # TODO: add mp kwargs
-# )
+ register(
+ id=f'HoleReacherDetPMP-{v}',
+ entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
+ kwargs={
+ "name": f"alr_envs:HoleReacher-{v}",
+ "num_dof": 5,
+ "num_basis": 5,
+ "duration": 2,
+ "width": 0.025,
+ "policy_type": "velocity",
+ "weights_scale": 0.2,
+ "zero_start": True
+ }
+ )
# TODO: properly add final_pos
register(
@@ -335,6 +343,41 @@ register(
}
)
+register(
+ id='ALRBallInACupSimpleDetPMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
+ kwargs={
+ "name": "alr_envs:ALRBallInACupSimple-v0",
+ "num_dof": 3,
+ "num_basis": 5,
+ "duration": 3.5,
+ "post_traj_time": 4.5,
+ "width": 0.0035,
+ # "off": -0.05,
+ "policy_type": "motor",
+ "weights_scale": 0.2,
+ "zero_start": True,
+ "zero_goal": True
+ }
+)
+
+register(
+ id='ALRBallInACupDetPMP-v0',
+ entry_point='alr_envs.utils.make_env_helpers:make_detpmp_env',
+ kwargs={
+ "name": "alr_envs:ALRBallInACupSimple-v0",
+ "num_dof": 7,
+ "num_basis": 5,
+ "duration": 3.5,
+ "post_traj_time": 4.5,
+ "width": 0.0035,
+ "policy_type": "motor",
+ "weights_scale": 0.2,
+ "zero_start": True,
+ "zero_goal": True
+ }
+)
+
register(
id='ALRBallInACupGoalDMP-v0',
entry_point='alr_envs.utils.make_env_helpers:make_contextual_env',
diff --git a/alr_envs/classic_control/hole_reacher.py b/alr_envs/classic_control/hole_reacher.py
index a4f1f33..730e7bf 100644
--- a/alr_envs/classic_control/hole_reacher.py
+++ b/alr_envs/classic_control/hole_reacher.py
@@ -7,10 +7,10 @@ from gym.utils import seeding
from matplotlib import patches
from alr_envs.classic_control.utils import check_self_collision
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
-class HoleReacherEnv(MPEnv):
+class HoleReacherEnv(AlrEnv):
def __init__(self, n_links: int, hole_x: Union[None, float] = None, hole_depth: Union[None, float] = None,
hole_width: float = 1., random_start: bool = False, allow_self_collision: bool = False,
@@ -71,14 +71,15 @@ class HoleReacherEnv(MPEnv):
A single step with an action in joint velocity space
"""
+ acc = (action - self._angle_velocity) / self.dt
self._angle_velocity = action
- self._joint_angles = self._joint_angles + self.dt * self._angle_velocity
+ self._joint_angles = self._joint_angles + self.dt * self._angle_velocity # + 0.001 * np.random.randn(5)
self._update_joints()
- acc = (action - self._angle_velocity) / self.dt
reward, info = self._get_reward(acc)
info.update({"is_collided": self._is_collided})
+ self.end_effector_traj.append(np.copy(self.end_effector))
self._steps += 1
done = self._is_collided
@@ -101,6 +102,7 @@ class HoleReacherEnv(MPEnv):
self._joints = np.zeros((self.n_links + 1, 2))
self._update_joints()
self._steps = 0
+ self.end_effector_traj = []
return self._get_obs().copy()
diff --git a/alr_envs/classic_control/simple_reacher.py b/alr_envs/classic_control/simple_reacher.py
index 425134d..5b04aad 100644
--- a/alr_envs/classic_control/simple_reacher.py
+++ b/alr_envs/classic_control/simple_reacher.py
@@ -5,10 +5,10 @@ import numpy as np
from gym import spaces
from gym.utils import seeding
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
-class SimpleReacherEnv(MPEnv):
+class SimpleReacherEnv(AlrEnv):
"""
Simple Reaching Task without any physics simulation.
Returns no reward until 150 time steps. This allows the agent to explore the space, but requires precise actions
diff --git a/alr_envs/classic_control/viapoint_reacher.py b/alr_envs/classic_control/viapoint_reacher.py
index 2897f31..8edcc69 100644
--- a/alr_envs/classic_control/viapoint_reacher.py
+++ b/alr_envs/classic_control/viapoint_reacher.py
@@ -6,10 +6,10 @@ import numpy as np
from gym.utils import seeding
from alr_envs.classic_control.utils import check_self_collision
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
-class ViaPointReacher(MPEnv):
+class ViaPointReacher(AlrEnv):
def __init__(self, n_links, random_start: bool = True, via_target: Union[None, Iterable] = None,
target: Union[None, Iterable] = None, allow_self_collision=False, collision_penalty=1000):
diff --git a/alr_envs/mujoco/alr_mujoco_env.py b/alr_envs/mujoco/alr_mujoco_env.py
index edc90b6..8dbdabb 100644
--- a/alr_envs/mujoco/alr_mujoco_env.py
+++ b/alr_envs/mujoco/alr_mujoco_env.py
@@ -1,5 +1,6 @@
from collections import OrderedDict
import os
+from abc import abstractmethod
from gym import error, spaces
@@ -142,18 +143,20 @@ class AlrMujocoEnv(gym.Env):
# methods to override:
# ----------------------------
+ @property
+ @abstractmethod
+ def active_obs(self):
+ """Returns boolean mask for each observation entry
+ whether the observation is returned for the contextual case or not.
+ This effectively allows to filter unwanted or unnecessary observations from the full step-based case.
+ """
+ return np.ones(self.observation_space.shape, dtype=bool)
+
def _get_obs(self):
"""Returns the observation.
"""
raise NotImplementedError()
- def configure(self, *args, **kwargs):
- """
- Helper method to set certain environment properties such as contexts in contextual environments since reset()
- doesn't take arguments. Should be called before reset().
- """
- pass
-
def reset_model(self):
"""
Reset the robot degrees of freedom (qpos and qvel).
diff --git a/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml b/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml
index 58f0ac6..9229ad5 100644
--- a/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml
+++ b/alr_envs/mujoco/ball_in_a_cup/assets/biac_base.xml
@@ -126,6 +126,7 @@
+
diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
index 66461c0..345b3ce 100644
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup.py
@@ -22,7 +22,7 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
self.j_min = np.array([-2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7])
self.j_max = np.array([2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7])
- self.context = None
+ self.context = context
utils.EzPickle.__init__(self)
alr_mujoco_env.AlrMujocoEnv.__init__(self,
@@ -45,7 +45,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
else:
raise ValueError("Unknown reward type")
self.reward_function = reward_function(self.sim_steps)
- self.configure(context)
@property
def start_pos(self):
@@ -69,10 +68,6 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def current_vel(self):
return self.sim.data.qvel[0:7].copy()
- def configure(self, context):
- self.context = context
- self.reward_function.reset(context)
-
def reset_model(self):
init_pos_all = self.init_qpos.copy()
init_pos_robot = self._start_pos
@@ -95,26 +90,27 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
reward_ctrl = - np.square(a).sum()
crash = self.do_simulation(a)
- joint_cons_viol = self.check_traj_in_joint_limits()
+ # joint_cons_viol = self.check_traj_in_joint_limits()
self._q_pos.append(self.sim.data.qpos[0:7].ravel().copy())
self._q_vel.append(self.sim.data.qvel[0:7].ravel().copy())
ob = self._get_obs()
- if not crash and not joint_cons_viol:
- reward, success, stop_sim = self.reward_function.compute_reward(a, self.sim, self._steps)
- done = success or self._steps == self.sim_steps - 1 or stop_sim
+ if not crash:
+ reward, success, is_collided = self.reward_function.compute_reward(a, self)
+ done = success or self._steps == self.sim_steps - 1 or is_collided
self._steps += 1
else:
- reward = -1000
+ reward = -2
success = False
+ is_collided = False
done = True
return ob, reward, done, dict(reward_dist=reward_dist,
reward_ctrl=reward_ctrl,
velocity=angular_vel,
traj=self._q_pos, is_success=success,
- is_collided=crash or joint_cons_viol)
+ is_collided=is_collided, sim_crash=crash)
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
@@ -129,6 +125,16 @@ class ALRBallInACupEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
[self._steps],
])
+ # TODO
+ @property
+ def active_obs(self):
+ return np.hstack([
+ [False] * 7, # cos
+ [False] * 7, # sin
+ # [True] * 2, # x-y coordinates of target distance
+ [False] # env steps
+ ])
+
# These functions are for the task with 3 joint actuations
def extend_des_pos(self, des_pos):
des_pos_full = self._start_pos.copy()
diff --git a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
index 13053eb..79987d6 100644
--- a/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
+++ b/alr_envs/mujoco/ball_in_a_cup/ball_in_a_cup_reward_simple.py
@@ -6,7 +6,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
def __init__(self, sim_time):
self.sim_time = sim_time
- self.collision_objects = ["cup_geom1", "cup_geom2", "wrist_palm_link_convex_geom",
+ self.collision_objects = ["cup_geom1", "cup_geom2", "cup_base_contact_below",
+ "wrist_palm_link_convex_geom",
"wrist_pitch_link_convex_decomposition_p1_geom",
"wrist_pitch_link_convex_decomposition_p2_geom",
"wrist_pitch_link_convex_decomposition_p3_geom",
@@ -20,6 +21,8 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.goal_id = None
self.goal_final_id = None
self.collision_ids = None
+ self._is_collided = False
+ self.collision_penalty = 1
self.ball_traj = None
self.dists = None
@@ -36,49 +39,52 @@ class BallInACupReward(alr_reward_fct.AlrReward):
self.action_costs = []
self.cup_angles = []
- def compute_reward(self, action, sim, step, context=None):
- self.ball_id = sim.model._body_name2id["ball"]
- self.ball_collision_id = sim.model._geom_name2id["ball_geom"]
- self.goal_id = sim.model._site_name2id["cup_goal"]
- self.goal_final_id = sim.model._site_name2id["cup_goal_final"]
- self.collision_ids = [sim.model._geom_name2id[name] for name in self.collision_objects]
+ def compute_reward(self, action, env):
+ self.ball_id = env.sim.model._body_name2id["ball"]
+ self.ball_collision_id = env.sim.model._geom_name2id["ball_geom"]
+ self.goal_id = env.sim.model._site_name2id["cup_goal"]
+ self.goal_final_id = env.sim.model._site_name2id["cup_goal_final"]
+ self.collision_ids = [env.sim.model._geom_name2id[name] for name in self.collision_objects]
- ball_in_cup = self.check_ball_in_cup(sim, self.ball_collision_id)
+ ball_in_cup = self.check_ball_in_cup(env.sim, self.ball_collision_id)
# Compute the current distance from the ball to the inner part of the cup
- goal_pos = sim.data.site_xpos[self.goal_id]
- ball_pos = sim.data.body_xpos[self.ball_id]
- goal_final_pos = sim.data.site_xpos[self.goal_final_id]
+ goal_pos = env.sim.data.site_xpos[self.goal_id]
+ ball_pos = env.sim.data.body_xpos[self.ball_id]
+ goal_final_pos = env.sim.data.site_xpos[self.goal_final_id]
self.dists.append(np.linalg.norm(goal_pos - ball_pos))
self.dists_final.append(np.linalg.norm(goal_final_pos - ball_pos))
- self.ball_traj[step, :] = ball_pos
- cup_quat = np.copy(sim.data.body_xquat[sim.model._body_name2id["cup"]])
+ self.ball_traj[env._steps, :] = ball_pos
+ cup_quat = np.copy(env.sim.data.body_xquat[env.sim.model._body_name2id["cup"]])
self.cup_angles.append(np.arctan2(2 * (cup_quat[0] * cup_quat[1] + cup_quat[2] * cup_quat[3]),
1 - 2 * (cup_quat[1]**2 + cup_quat[2]**2)))
action_cost = np.sum(np.square(action))
self.action_costs.append(action_cost)
- if self.check_collision(sim):
- reward = - 1000
- return reward, False, True
+ self._is_collided = self.check_collision(env.sim) or env.check_traj_in_joint_limits()
- if step == self.sim_time - 1:
+ if env._steps == env.sim_steps - 1 or self._is_collided:
t_min_dist = np.argmin(self.dists)
angle_min_dist = self.cup_angles[t_min_dist]
cost_angle = (angle_min_dist - np.pi / 2)**2
min_dist = self.dists[t_min_dist]
dist_final = self.dists_final[-1]
+ min_dist_final = np.min(self.dists_final)
- cost = 0.5 * min_dist + 0.5 * dist_final + 0.01 * cost_angle
- reward = np.exp(-2 * cost) - 1e-3 * action_cost
- success = dist_final < 0.05 and ball_in_cup
+ cost = 0.5 * dist_final + 0.05 * cost_angle # TODO: Increase cost_angle weight # 0.5 * min_dist +
+ # reward = np.exp(-2 * cost) - 1e-2 * action_cost - self.collision_penalty * int(self._is_collided)
+ # reward = - dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
+ reward = - dist_final**2 - min_dist_final**2 - 1e-4 * cost_angle - 1e-5 * action_cost - self.collision_penalty * int(self._is_collided)
+ success = dist_final < 0.05 and ball_in_cup and not self._is_collided
+ crash = self._is_collided
else:
- reward = - 1e-3 * action_cost
+ reward = - 1e-5 * action_cost # TODO: increase action_cost weight
success = False
+ crash = False
- return reward, success, False
+ return reward, success, crash
def check_ball_in_cup(self, sim, ball_collision_id):
cup_base_collision_id = sim.model._geom_name2id["cup_base_contact"]
diff --git a/alr_envs/mujoco/beerpong/beerpong.py b/alr_envs/mujoco/beerpong/beerpong.py
index dc6278d..5efc431 100644
--- a/alr_envs/mujoco/beerpong/beerpong.py
+++ b/alr_envs/mujoco/beerpong/beerpong.py
@@ -105,6 +105,7 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
def check_traj_in_joint_limits(self):
return any(self.current_pos > self.j_max) or any(self.current_pos < self.j_min)
+ # TODO
def _get_obs(self):
theta = self.sim.data.qpos.flat[:7]
return np.concatenate([
@@ -114,6 +115,10 @@ class ALRBeerpongEnv(alr_mujoco_env.AlrMujocoEnv, utils.EzPickle):
[self._steps],
])
+ # TODO
+ def active_obs(self):
+ pass
+
if __name__ == "__main__":
diff --git a/alr_envs/utils/mp_env_async_sampler.py b/alr_envs/utils/mp_env_async_sampler.py
index e935ba6..5cda41f 100644
--- a/alr_envs/utils/mp_env_async_sampler.py
+++ b/alr_envs/utils/mp_env_async_sampler.py
@@ -40,6 +40,16 @@ def _flatten_list(l):
return [l__ for l_ in l for l__ in l_]
+class DummyDist:
+ def __init__(self, dim):
+ self.dim = dim
+
+ def sample(self, contexts):
+ contexts = np.atleast_2d(contexts)
+ n_samples = contexts.shape[0]
+ return np.random.normal(size=(n_samples, self.dim)), contexts
+
+
class AlrMpEnvSampler:
"""
An asynchronous sampler for non contextual MPWrapper environments. A sampler object can be called with a set of
@@ -100,9 +110,9 @@ class AlrContextualMpEnvSampler:
if __name__ == "__main__":
- env_name = "alr_envs:ALRBallInACupSimpleDMP-v0"
+ env_name = "alr_envs:HoleReacherDetPMP-v1"
n_cpu = 8
- dim = 15
+ dim = 25
n_samples = 10
sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
diff --git a/alr_envs/utils/mps/detpmp_wrapper.py b/alr_envs/utils/mps/detpmp_wrapper.py
index 3f6d1ee..8661781 100644
--- a/alr_envs/utils/mps/detpmp_wrapper.py
+++ b/alr_envs/utils/mps/detpmp_wrapper.py
@@ -2,28 +2,29 @@ import gym
import numpy as np
from mp_lib import det_promp
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.mps.mp_wrapper import MPWrapper
class DetPMPWrapper(MPWrapper):
- def __init__(self, env: MPEnv, num_dof: int, num_basis: int, width: int, duration: int = 1, dt: float = 0.01,
+ def __init__(self, env: AlrEnv, num_dof: int, num_basis: int, width: float, duration: float = 1, dt: float = 0.01,
post_traj_time: float = 0., policy_type: str = None, weights_scale: float = 1.,
zero_start: bool = False, zero_goal: bool = False, **mp_kwargs):
self.duration = duration # seconds
+ dt = env.dt if hasattr(env, "dt") else dt
+ assert dt is not None
+ self.dt = dt
+
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, num_basis=num_basis,
width=width, zero_start=zero_start, zero_goal=zero_goal, **mp_kwargs)
- self.dt = dt
-
action_bounds = np.inf * np.ones((self.mp.n_basis * self.mp.n_dof))
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
-
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, width: float = None,
- zero_start: bool = False, zero_goal: bool = False):
- pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=0.01,
+ off: float = 0.01, zero_start: bool = False, zero_goal: bool = False):
+ pmp = det_promp.DeterministicProMP(n_basis=num_basis, n_dof=num_dof, width=width, off=off,
zero_start=zero_start, zero_goal=zero_goal)
weights = np.zeros(shape=(num_basis, num_dof))
@@ -32,10 +33,10 @@ class DetPMPWrapper(MPWrapper):
return pmp
def mp_rollout(self, action):
- params = np.reshape(action, (self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
+ params = np.reshape(action, newshape=(self.mp.n_basis, self.mp.n_dof)) * self.weights_scale
self.mp.set_weights(self.duration, params)
_, des_pos, des_vel, _ = self.mp.compute_trajectory(1 / self.dt, 1.)
if self.mp.zero_start:
- des_pos += self.start_pos[None, :]
+ des_pos += self.env.start_pos[None, :]
return des_pos, des_vel
diff --git a/alr_envs/utils/mps/dmp_wrapper.py b/alr_envs/utils/mps/dmp_wrapper.py
index 6bc5bc6..4958a98 100644
--- a/alr_envs/utils/mps/dmp_wrapper.py
+++ b/alr_envs/utils/mps/dmp_wrapper.py
@@ -4,13 +4,13 @@ from mp_lib import dmps
from mp_lib.basis import DMPBasisGenerator
from mp_lib.phase import ExpDecayPhaseGenerator
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.mps.mp_wrapper import MPWrapper
class DmpWrapper(MPWrapper):
- def __init__(self, env: MPEnv, num_dof: int, num_basis: int,
+ def __init__(self, env: AlrEnv, num_dof: int, num_basis: int,
duration: int = 1, alpha_phase: float = 2., dt: float = None,
learn_goal: bool = False, post_traj_time: float = 0.,
weights_scale: float = 1., goal_scale: float = 1., bandwidth_factor: float = 3.,
@@ -40,7 +40,7 @@ class DmpWrapper(MPWrapper):
super().__init__(env, num_dof, dt, duration, post_traj_time, policy_type, weights_scale, render_mode,
num_basis=num_basis, alpha_phase=alpha_phase, bandwidth_factor=bandwidth_factor)
- action_bounds = np.inf * np.ones((np.prod(self.mp.dmp_weights.shape) + (num_dof if learn_goal else 0)))
+ action_bounds = np.inf * np.ones((np.prod(self.mp.weights.shape) + (num_dof if learn_goal else 0)))
self.action_space = gym.spaces.Box(low=-action_bounds, high=action_bounds, dtype=np.float32)
def initialize_mp(self, num_dof: int, duration: int, dt: float, num_basis: int = 5, alpha_phase: float = 2.,
@@ -51,7 +51,7 @@ class DmpWrapper(MPWrapper):
basis_bandwidth_factor=bandwidth_factor)
dmp = dmps.DMP(num_dof=num_dof, basis_generator=basis_generator, phase_generator=phase_generator,
- num_time_steps=int(duration / dt), dt=dt)
+ duration=duration, dt=dt)
return dmp
@@ -66,7 +66,7 @@ class DmpWrapper(MPWrapper):
goal_pos = self.env.goal_pos
assert goal_pos is not None
- weight_matrix = np.reshape(params, self.mp.dmp_weights.shape) # [num_basis, num_dof]
+ weight_matrix = np.reshape(params, self.mp.weights.shape) # [num_basis, num_dof]
return goal_pos * self.goal_scale, weight_matrix * self.weights_scale
def mp_rollout(self, action):
diff --git a/alr_envs/utils/mps/mp_environments.py b/alr_envs/utils/mps/mp_environments.py
index f397491..60db99b 100644
--- a/alr_envs/utils/mps/mp_environments.py
+++ b/alr_envs/utils/mps/mp_environments.py
@@ -5,7 +5,7 @@ import gym
import numpy as np
-class MPEnv(gym.Env):
+class AlrEnv(gym.Env):
@property
@abstractmethod
diff --git a/alr_envs/utils/mps/mp_wrapper.py b/alr_envs/utils/mps/mp_wrapper.py
index 4c92806..c31072f 100644
--- a/alr_envs/utils/mps/mp_wrapper.py
+++ b/alr_envs/utils/mps/mp_wrapper.py
@@ -3,13 +3,13 @@ from abc import ABC, abstractmethod
import gym
import numpy as np
-from alr_envs.utils.mps.mp_environments import MPEnv
+from alr_envs.utils.mps.mp_environments import AlrEnv
from alr_envs.utils.policies import get_policy_class
class MPWrapper(gym.Wrapper, ABC):
- def __init__(self, env: MPEnv, num_dof: int, dt: float, duration: int = 1, post_traj_time: float = 0.,
+ def __init__(self, env: AlrEnv, num_dof: int, dt: float, duration: float = 1, post_traj_time: float = 0.,
policy_type: str = None, weights_scale: float = 1., render_mode: str = None, **mp_kwargs):
super().__init__(env)
@@ -53,9 +53,6 @@ class MPWrapper(gym.Wrapper, ABC):
return obs, np.array(rewards), dones, infos
- def configure(self, context):
- self.env.configure(context)
-
def reset(self):
return self.env.reset()[self.env.active_obs]
@@ -65,7 +62,7 @@ class MPWrapper(gym.Wrapper, ABC):
if self.post_traj_steps > 0:
trajectory = np.vstack([trajectory, np.tile(trajectory[-1, :], [self.post_traj_steps, 1])])
- velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.num_dimensions))])
+ velocity = np.vstack([velocity, np.zeros(shape=(self.post_traj_steps, self.mp.n_dof))])
# self._trajectory = trajectory
# self._velocity = velocity
@@ -105,7 +102,7 @@ class MPWrapper(gym.Wrapper, ABC):
raise NotImplementedError()
@abstractmethod
- def initialize_mp(self, num_dof: int, duration: int, dt: float, **kwargs):
+ def initialize_mp(self, num_dof: int, duration: float, dt: float, **kwargs):
"""
Create respective instance of MP
Returns:
diff --git a/example.py b/example.py
index 166f38f..af30138 100644
--- a/example.py
+++ b/example.py
@@ -1,6 +1,7 @@
from collections import defaultdict
import gym
import numpy as np
+from alr_envs.utils.mp_env_async_sampler import AlrMpEnvSampler, AlrContextualMpEnvSampler, DummyDist
def example_mujoco():
@@ -22,9 +23,9 @@ def example_mujoco():
obs = env.reset()
-def example_dmp():
+def example_mp(env_name="alr_envs:HoleReacherDMP-v0"):
# env = gym.make("alr_envs:ViaPointReacherDMP-v0")
- env = gym.make("alr_envs:HoleReacherDMP-v0")
+ env = gym.make(env_name)
rewards = 0
# env.render(mode=None)
obs = env.reset()
@@ -79,9 +80,36 @@ def example_async(env_id="alr_envs:HoleReacherDMP-v0", n_cpu=4, seed=int('533D',
print(sample(envs, 16))
+def example_async_sampler(env_name="alr_envs:HoleReacherDetPMP-v1", n_cpu=4):
+ n_samples = 10
+
+ sampler = AlrMpEnvSampler(env_name, num_envs=n_cpu)
+ dim = sampler.env.action_space.spaces[0].shape[0]
+
+ thetas = np.random.randn(n_samples, dim) # usually form a search distribution
+
+ _, rewards, __, ___ = sampler(thetas)
+
+ print(rewards)
+
+
+def example_async_contextual_sampler(env_name="alr_envs:SimpleReacherDMP-v1", n_cpu=4):
+ sampler = AlrContextualMpEnvSampler(env_name, num_envs=n_cpu)
+ dim = sampler.env.action_space.spaces[0].shape[0]
+ dist = DummyDist(dim) # needs a sample function
+
+ n_samples = 10
+ new_samples, new_contexts, obs, new_rewards, done, infos = sampler(dist, n_samples)
+
+ print(new_rewards)
+
+
if __name__ == '__main__':
# example_mujoco()
- # example_dmp()
- example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
- # env = gym.make("alr_envs:HoleReacherDMP-v0", context=0.1)
- # env = gym.make("alr_envs:HoleReacherDMP-v1")
+ # example_dmp("alr_envs:SimpleReacherDMP-v1")
+ # example_async("alr_envs:LongSimpleReacherDMP-v0", 4)
+ # example_async_contextual_sampler()
+ # env = gym.make("alr_envs:HoleReacherDetPMP-v1")
+ env_name = "alr_envs:ALRBallInACupSimpleDetPMP-v0"
+ # example_async_sampler(env_name)
+ example_mp(env_name)